diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index fddb28f..1b393cb 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -20,7 +20,7 @@ jobs: language: c++ fuzz-seconds: 600 - name: Upload Crash - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 0ec6d70..0883029 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -2,22 +2,100 @@ name: CMake on: push: - branches: [ "main" ] pull_request: - branches: [ "main" ] env: BUILD_TYPE: Release jobs: build: - runs-on: ubuntu-latest + strategy: + matrix: + include: + - name: ubuntu-latest-gcc-cmake + os: ubuntu-latest + cc: gcc + cxx: g++ + build-system: cmake + cmake-opts: '' + + - name: ubuntu-latest-clang-cmake + os: ubuntu-latest + cc: clang + cxx: clang++ + build-system: cmake + cmake-opts: '' + + - name: ubuntu-24.04-arm-clang-cmake + os: ubuntu-24.04-arm + cc: clang + cxx: clang++ + build-system: cmake + cmake-opts: '' + + - name: ubuntu-latest-clang-cmake-asan-fuzzer + os: ubuntu-latest + cc: clang + cxx: clang++ + build-system: cmake + cmake-opts: '-DSANITIZE=fuzzer-no-link,address' + + - name: ubuntu-latest-clang-cmake-ninja + os: ubuntu-latest + cc: clang + cxx: clang++ + build-system: cmake + cmake-opts: '-G Ninja' + + - name: macos-latest-clang-cmake + os: macos-latest + cc: clang + cxx: clang++ + build-system: cmake + cmake-opts: '' + + - name: ubuntu-latest-cross-aarch64-cmake + os: ubuntu-latest + cc: aarch64-linux-gnu-gcc + cxx: aarch64-linux-gnu-g++ + build-system: cmake + cmake-opts: '-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch64_toolchain.cmake' + + - name: ubuntu-latest-cross-aarch32-cmake + os: ubuntu-latest + cc: arm-linux-gnueabihf-gcc + cxx: arm-linux-gnueabihf-g++ + build-system: cmake + cmake-opts: '-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch32_toolchain.cmake' + + runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + + - name: Install Linux dependencies + if: startsWith(matrix.os,'ubuntu') && contains(matrix.cmake-opts,'-G Ninja') + run: | + sudo apt-get update + sudo apt-get install -y ninja-build + + - name: Install cross-aarch64 dependencies + if: startsWith(matrix.os,'ubuntu') && contains(matrix.cmake-opts,'aarch64') + run: | + sudo apt-get update + sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu + + - name: Install cross-arm dependencies + if: startsWith(matrix.os,'ubuntu') && contains(matrix.cmake-opts,'aarch32') + run: | + sudo apt-get update + sudo apt-get install gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf - name: Configure CMake - run: cmake -B ${{github.workspace}}/out -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ + env: + CC: ${{ matrix.cc }} + CXX: ${{ matrix.cxx }} + run: cmake -B ${{github.workspace}}/out -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} ${{ matrix.cmake-opts }} - name: Build run: cmake --build ${{github.workspace}}/out --config ${{env.BUILD_TYPE}} diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..9e7a00e --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Native", + "includePath": [ + "${workspaceFolder}/**", + "${workspaceFolder}/common", + "${workspaceFolder}/decoder", + "${workspaceFolder}/encoder" + ], + "defines": [], + "cStandard": "c17", + "cppStandard": "c++17", + "configurationProvider": "ms-vscode.cmake-tools" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..d72be95 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,81 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Run hevcenc - Linux", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/hevcenc", + "args": ["../test/encoder/vid_enc_cfg.txt"], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/build", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] + }, + { + "name": "Run hevcdec - Linux", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/hevcdec", + "args": ["../test/decoder/test.cfg"], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/build", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] + }, + { + "name": "Run hevcenc - Mac", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/hevcenc", + "args": ["../test/encoder/vid_enc_cfg.txt"], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/build", + "environment": [], + "externalConsole": false, + "MIMode": "lldb", + "setupCommands": [ + { + "description": "Enable pretty-printing for lldb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] + }, + { + "name": "Run hevcdec - Mac", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/hevcdec", + "args": ["../test/decoder/test.cfg"], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/build", + "environment": [], + "externalConsole": false, + "MIMode": "lldb", + "setupCommands": [ + { + "description": "Enable pretty-printing for lldb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..5b06b51 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,19 @@ +{ + "cmake.buildDirectory": "${workspaceFolder}/build", + "cmake.sourceDirectory": "${workspaceFolder}", + "cmake.configureArgs": [ + "-DENABLE_MVC=OFF", + "-DENABLE_SVC=OFF", + "-DENABLE_TESTS=OFF", + "-DCMAKE_C_COMPILER=clang", + "-DCMAKE_CXX_COMPILER=clang++" + ], + "cmake.preferredGenerators": [ + "Unix Makefiles" + ], + "cmake.debugConfig": { + "hevcenc": "hevcenc", + "hevcdec": "hevcdec" + }, + "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools" +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..646fbf8 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,29 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "cmake", + "label": "Configure", + "command": "configure", + "problemMatcher": [ + "$gcc" + ], + "group": "build" + }, + { + "type": "cmake", + "label": "Build", + "command": "build", + "problemMatcher": [ + "$gcc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "dependsOn": [ + "Configure" + ] + } + ] +} \ No newline at end of file diff --git a/Android.bp b/Android.bp index a7ca755..b5a0580 100644 --- a/Android.bp +++ b/Android.bp @@ -35,23 +35,23 @@ cc_library_headers { cc_library_static { name: "libhevcdec", + defaults: ["no_bti"], vendor_available: true, host_supported: true, cflags: [ "-D_LIB", - "-DMULTICORE", "-fPIC", + "-DENABLE_MAIN_REXT_PROFILE", "-O3", "-DANDROID", + "-DDISABLE_SEI", "-Wall", "-Werror", // common/x86/ihevc_sao_ssse3_intr.c: implicit conversion from // 'int' to 'char' changes value from 128 to -128 "-Wno-error=constant-conversion", - // #KEEP_THREAD_ACTIVE is experimental - "-UKEEP_THREADS_ACTIVE", ], export_include_dirs: [ @@ -60,64 +60,66 @@ cc_library_static { ], srcs: [ - "common/ihevc_quant_tables.c", - "common/ihevc_inter_pred_filters.c", - "common/ihevc_weighted_pred.c", - "common/ihevc_padding.c", - "common/ihevc_deblk_edge_filter.c", - "common/ihevc_deblk_tables.c", + "common/ihevc_buf_mgr.c", "common/ihevc_cabac_tables.c", - "common/ihevc_common_tables.c", - "common/ihevc_intra_pred_filters.c", "common/ihevc_chroma_intra_pred_filters.c", - "common/ihevc_mem_fns.c", - "common/ihevc_sao.c", - "common/ihevc_trans_tables.c", - "common/ihevc_recon.c", - "common/ihevc_itrans.c", - "common/ihevc_itrans_recon.c", - "common/ihevc_iquant_recon.c", - "common/ihevc_iquant_itrans_recon.c", - "common/ihevc_itrans_recon_32x32.c", - "common/ihevc_itrans_recon_16x16.c", - "common/ihevc_itrans_recon_8x8.c", - "common/ihevc_chroma_itrans_recon.c", - "common/ihevc_chroma_iquant_recon.c", "common/ihevc_chroma_iquant_itrans_recon.c", - "common/ihevc_chroma_recon.c", + "common/ihevc_chroma_iquant_recon.c", + "common/ihevc_chroma_itrans_recon.c", + "common/ihevc_chroma_itrans_recon_32x32.c", "common/ihevc_chroma_itrans_recon_16x16.c", "common/ihevc_chroma_itrans_recon_8x8.c", - "common/ihevc_buf_mgr.c", + "common/ihevc_chroma_recon.c", + "common/ihevc_common_tables.c", + "common/ihevc_deblk_edge_filter.c", + "common/ihevc_deblk_tables.c", "common/ihevc_disp_mgr.c", "common/ihevc_dpb_mgr.c", + "common/ihevc_inter_pred_filters.c", + "common/ihevc_intra_pred_filters.c", + "common/ihevc_iquant_itrans_recon.c", + "common/ihevc_iquant_recon.c", + "common/ihevc_itrans.c", + "common/ihevc_itrans_res.c", + "common/ihevc_itrans_recon.c", + "common/ihevc_itrans_recon_16x16.c", + "common/ihevc_itrans_recon_32x32.c", + "common/ihevc_itrans_recon_8x8.c", + "common/ihevc_mem_fns.c", + "common/ihevc_padding.c", + "common/ihevc_quant_tables.c", + "common/ihevc_recon.c", + "common/ihevc_sao.c", + "common/ihevc_trans_tables.c", + "common/ihevc_weighted_pred.c", "common/ithread.c", - "decoder/ihevcd_version.c", "decoder/ihevcd_api.c", - "decoder/ihevcd_decode.c", - "decoder/ihevcd_nal.c", "decoder/ihevcd_bitstream.c", - "decoder/ihevcd_parse_headers.c", - "decoder/ihevcd_parse_slice_header.c", - "decoder/ihevcd_parse_slice.c", - "decoder/ihevcd_parse_residual.c", + "decoder/ihevcd_boundary_strength.c", "decoder/ihevcd_cabac.c", - "decoder/ihevcd_intra_pred_mode_prediction.c", - "decoder/ihevcd_process_slice.c", - "decoder/ihevcd_utils.c", - "decoder/ihevcd_job_queue.c", - "decoder/ihevcd_ref_list.c", + "decoder/ihevcd_common_tables.c", + "decoder/ihevcd_deblk.c", + "decoder/ihevcd_decode.c", + "decoder/ihevcd_fmt_conv.c", "decoder/ihevcd_get_mv.c", - "decoder/ihevcd_mv_pred.c", - "decoder/ihevcd_mv_merge.c", + "decoder/ihevcd_ilf_padding.c", + "decoder/ihevcd_inter_pred.c", + "decoder/ihevcd_intra_pred_mode_prediction.c", "decoder/ihevcd_iquant_itrans_recon_ctb.c", "decoder/ihevcd_itrans_recon_dc.c", - "decoder/ihevcd_common_tables.c", - "decoder/ihevcd_boundary_strength.c", - "decoder/ihevcd_deblk.c", - "decoder/ihevcd_inter_pred.c", + "decoder/ihevcd_job_queue.c", + "decoder/ihevcd_mv_merge.c", + "decoder/ihevcd_mv_pred.c", + "decoder/ihevcd_nal.c", + "decoder/ihevcd_parse_headers.c", + "decoder/ihevcd_parse_residual.c", + "decoder/ihevcd_parse_slice.c", + "decoder/ihevcd_parse_slice_header.c", + "decoder/ihevcd_process_slice.c", + "decoder/ihevcd_ref_list.c", "decoder/ihevcd_sao.c", - "decoder/ihevcd_ilf_padding.c", - "decoder/ihevcd_fmt_conv.c", + "decoder/ihevcd_utils.c", + "decoder/ihevcd_version.c", ], arch: { @@ -131,29 +133,64 @@ cc_library_static { "-DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC", ], local_include_dirs: [ - "decoder/arm", "common/arm", - "decoder/arm64", "common/arm64", + "decoder/arm", + "decoder/arm64", ], srcs: [ - "decoder/arm/ihevcd_function_selector.c", - "decoder/arm/ihevcd_function_selector_noneon.c", - "decoder/arm64/ihevcd_function_selector_av8.c", "common/arm/ihevc_intra_pred_filters_neon_intr.c", "common/arm/ihevc_weighted_pred_neon_intr.c", - "common/arm64/ihevc_mem_fns.s", - "common/arm64/ihevc_itrans_recon_32x32.s", - "common/arm64/ihevc_weighted_pred_bi_default.s", - "common/arm64/ihevc_weighted_pred_bi.s", - "common/arm64/ihevc_weighted_pred_uni.s", + "common/arm64/ihevc_deblk_chroma_horz.s", + "common/arm64/ihevc_deblk_chroma_vert.s", "common/arm64/ihevc_deblk_luma_horz.s", "common/arm64/ihevc_deblk_luma_vert.s", - "common/arm64/ihevc_deblk_chroma_vert.s", - "common/arm64/ihevc_deblk_chroma_horz.s", - "common/arm64/ihevc_sao_band_offset_luma.s", + "common/arm64/ihevc_inter_pred_chroma_copy.s", + "common/arm64/ihevc_inter_pred_chroma_copy_w16out.s", + "common/arm64/ihevc_inter_pred_chroma_horz.s", + "common/arm64/ihevc_inter_pred_chroma_horz_w16out.s", + "common/arm64/ihevc_inter_pred_chroma_vert.s", + "common/arm64/ihevc_inter_pred_chroma_vert_w16inp.s", + "common/arm64/ihevc_inter_pred_chroma_vert_w16inp_w16out.s", + "common/arm64/ihevc_inter_pred_chroma_vert_w16out.s", + "common/arm64/ihevc_inter_pred_filters_luma_horz.s", + "common/arm64/ihevc_inter_pred_filters_luma_vert.s", + "common/arm64/ihevc_inter_pred_filters_luma_vert_w16inp.s", + "common/arm64/ihevc_inter_pred_filters_luma_vert_w16out.s", + "common/arm64/ihevc_inter_pred_luma_copy.s", + "common/arm64/ihevc_inter_pred_luma_copy_w16out.s", + "common/arm64/ihevc_inter_pred_luma_horz_w16out.s", + "common/arm64/ihevc_inter_pred_luma_vert_w16inp_w16out.s", + "common/arm64/ihevc_intra_pred_chroma_dc.s", + "common/arm64/ihevc_intra_pred_chroma_horz.s", + "common/arm64/ihevc_intra_pred_chroma_mode2.s", + "common/arm64/ihevc_intra_pred_chroma_mode_18_34.s", + "common/arm64/ihevc_intra_pred_chroma_mode_27_to_33.s", + "common/arm64/ihevc_intra_pred_chroma_mode_3_to_9.s", + "common/arm64/ihevc_intra_pred_chroma_planar.s", + "common/arm64/ihevc_intra_pred_chroma_ver.s", + "common/arm64/ihevc_intra_pred_filters_chroma_mode_11_to_17.s", + "common/arm64/ihevc_intra_pred_filters_chroma_mode_19_to_25.s", + "common/arm64/ihevc_intra_pred_filters_luma_mode_11_to_17.s", + "common/arm64/ihevc_intra_pred_filters_luma_mode_19_to_25.s", + "common/arm64/ihevc_intra_pred_luma_dc.s", + "common/arm64/ihevc_intra_pred_luma_horz.s", + "common/arm64/ihevc_intra_pred_luma_mode2.s", + "common/arm64/ihevc_intra_pred_luma_mode_18_34.s", + "common/arm64/ihevc_intra_pred_luma_mode_27_to_33.s", + "common/arm64/ihevc_intra_pred_luma_mode_3_to_9.s", + "common/arm64/ihevc_intra_pred_luma_planar.s", + "common/arm64/ihevc_intra_pred_luma_vert.s", + "common/arm64/ihevc_itrans_recon_16x16.s", + "common/arm64/ihevc_itrans_recon_32x32.s", + "common/arm64/ihevc_itrans_recon_4x4.s", + "common/arm64/ihevc_itrans_recon_4x4_ttype1.s", + "common/arm64/ihevc_itrans_recon_8x8.s", + "common/arm64/ihevc_mem_fns.s", + "common/arm64/ihevc_padding.s", "common/arm64/ihevc_sao_band_offset_chroma.s", + "common/arm64/ihevc_sao_band_offset_luma.s", "common/arm64/ihevc_sao_edge_offset_class0.s", "common/arm64/ihevc_sao_edge_offset_class0_chroma.s", "common/arm64/ihevc_sao_edge_offset_class1.s", @@ -162,156 +199,107 @@ cc_library_static { "common/arm64/ihevc_sao_edge_offset_class2_chroma.s", "common/arm64/ihevc_sao_edge_offset_class3.s", "common/arm64/ihevc_sao_edge_offset_class3_chroma.s", - "common/arm64/ihevc_inter_pred_luma_horz_w16out.s", - "common/arm64/ihevc_inter_pred_filters_luma_horz.s", - "common/arm64/ihevc_inter_pred_filters_luma_vert.s", - "common/arm64/ihevc_inter_pred_chroma_horz.s", - "common/arm64/ihevc_inter_pred_chroma_horz_w16out.s", - "common/arm64/ihevc_inter_pred_chroma_vert.s", - "common/arm64/ihevc_inter_pred_chroma_vert_w16out.s", - "common/arm64/ihevc_inter_pred_chroma_vert_w16inp.s", - "common/arm64/ihevc_inter_pred_chroma_vert_w16inp_w16out.s", - "common/arm64/ihevc_inter_pred_filters_luma_vert_w16inp.s", - "common/arm64/ihevc_inter_pred_filters_luma_vert_w16out.s", - "common/arm64/ihevc_inter_pred_luma_vert_w16inp_w16out.s", - "common/arm64/ihevc_inter_pred_luma_copy_w16out.s", - "common/arm64/ihevc_inter_pred_luma_copy.s", - "common/arm64/ihevc_inter_pred_chroma_copy.s", - "common/arm64/ihevc_inter_pred_chroma_copy_w16out.s", - "common/arm64/ihevc_itrans_recon_4x4_ttype1.s", - "common/arm64/ihevc_itrans_recon_4x4.s", - "common/arm64/ihevc_itrans_recon_8x8.s", - "common/arm64/ihevc_itrans_recon_16x16.s", - "common/arm64/ihevc_intra_pred_chroma_planar.s", - "common/arm64/ihevc_intra_pred_chroma_dc.s", - "common/arm64/ihevc_intra_pred_chroma_horz.s", - "common/arm64/ihevc_intra_pred_chroma_ver.s", - "common/arm64/ihevc_intra_pred_chroma_mode2.s", - "common/arm64/ihevc_intra_pred_chroma_mode_18_34.s", - "common/arm64/ihevc_intra_pred_filters_chroma_mode_11_to_17.s", - "common/arm64/ihevc_intra_pred_filters_chroma_mode_19_to_25.s", - "common/arm64/ihevc_intra_pred_chroma_mode_3_to_9.s", - "common/arm64/ihevc_intra_pred_chroma_mode_27_to_33.s", - "common/arm64/ihevc_intra_pred_luma_planar.s", - "common/arm64/ihevc_intra_pred_luma_horz.s", - "common/arm64/ihevc_intra_pred_luma_mode2.s", - "common/arm64/ihevc_intra_pred_luma_mode_27_to_33.s", - "common/arm64/ihevc_intra_pred_luma_mode_18_34.s", - "common/arm64/ihevc_intra_pred_luma_vert.s", - "common/arm64/ihevc_intra_pred_luma_dc.s", - "common/arm64/ihevc_intra_pred_filters_luma_mode_11_to_17.s", - "common/arm64/ihevc_intra_pred_filters_luma_mode_19_to_25.s", - "common/arm64/ihevc_intra_pred_luma_mode_3_to_9.s", - "common/arm64/ihevc_padding.s", - "decoder/arm64/ihevcd_itrans_recon_dc_luma.s", - "decoder/arm64/ihevcd_itrans_recon_dc_chroma.s", + "common/arm64/ihevc_weighted_pred_bi.s", + "common/arm64/ihevc_weighted_pred_bi_default.s", + "common/arm64/ihevc_weighted_pred_uni.s", + "decoder/arm/ihevcd_function_selector.c", + "decoder/arm/ihevcd_function_selector_noneon.c", "decoder/arm64/ihevcd_fmt_conv_420sp_to_420p.s", "decoder/arm64/ihevcd_fmt_conv_420sp_to_420sp.s", - "decoder/arm64/ihevcd_fmt_conv_420sp_to_rgba8888.s", + "decoder/arm64/ihevcd_function_selector_av8.c", + "decoder/arm64/ihevcd_itrans_recon_dc_chroma.s", + "decoder/arm64/ihevcd_itrans_recon_dc_luma.s", ], }, arm: { local_include_dirs: [ - "decoder/arm", "common/arm", + "decoder/arm", ], srcs: [ "decoder/arm/ihevcd_function_selector.c", "decoder/arm/ihevcd_function_selector_noneon.c", + "common/arm/ihevc_deblk_chroma_horz.s", + "common/arm/ihevc_deblk_chroma_vert.s", + "common/arm/ihevc_deblk_luma_horz.s", + "common/arm/ihevc_deblk_luma_vert.s", + "common/arm/ihevc_inter_pred_chroma_copy.s", + "common/arm/ihevc_inter_pred_chroma_copy_w16out.s", + "common/arm/ihevc_inter_pred_chroma_horz.s", + "common/arm/ihevc_inter_pred_chroma_horz_w16out.s", + "common/arm/ihevc_inter_pred_chroma_vert.s", + "common/arm/ihevc_inter_pred_chroma_vert_w16inp.s", + "common/arm/ihevc_inter_pred_chroma_vert_w16inp_w16out.s", + "common/arm/ihevc_inter_pred_chroma_vert_w16out.s", + "common/arm/ihevc_inter_pred_filters_luma_horz.s", + "common/arm/ihevc_inter_pred_filters_luma_vert.s", + "common/arm/ihevc_inter_pred_filters_luma_vert_w16inp.s", + "common/arm/ihevc_inter_pred_luma_copy.s", + "common/arm/ihevc_inter_pred_luma_copy_w16out.s", + "common/arm/ihevc_inter_pred_luma_horz_w16out.s", + "common/arm/ihevc_inter_pred_luma_vert_w16inp_w16out.s", + "common/arm/ihevc_intra_pred_chroma_dc.s", + "common/arm/ihevc_intra_pred_chroma_horz.s", + "common/arm/ihevc_intra_pred_chroma_mode2.s", + "common/arm/ihevc_intra_pred_chroma_mode_18_34.s", + "common/arm/ihevc_intra_pred_chroma_mode_27_to_33.s", + "common/arm/ihevc_intra_pred_chroma_mode_3_to_9.s", + "common/arm/ihevc_intra_pred_chroma_planar.s", + "common/arm/ihevc_intra_pred_chroma_ver.s", + "common/arm/ihevc_intra_pred_filters_chroma_mode_11_to_17.s", + "common/arm/ihevc_intra_pred_filters_chroma_mode_19_to_25.s", + "common/arm/ihevc_intra_pred_filters_luma_mode_11_to_17.s", + "common/arm/ihevc_intra_pred_filters_luma_mode_19_to_25.s", + "common/arm/ihevc_intra_pred_filters_neon_intr.c", + "common/arm/ihevc_intra_pred_luma_dc.s", + "common/arm/ihevc_intra_pred_luma_horz.s", + "common/arm/ihevc_intra_pred_luma_mode2.s", + "common/arm/ihevc_intra_pred_luma_mode_18_34.s", + "common/arm/ihevc_intra_pred_luma_mode_27_to_33.s", + "common/arm/ihevc_intra_pred_luma_mode_3_to_9.s", + "common/arm/ihevc_intra_pred_luma_planar.s", + "common/arm/ihevc_intra_pred_luma_vert.s", + "common/arm/ihevc_intra_ref_substitution_a9q.c", + "common/arm/ihevc_itrans_recon_16x16.s", + "common/arm/ihevc_itrans_recon_32x32.s", + "common/arm/ihevc_itrans_recon_4x4.s", + "common/arm/ihevc_itrans_recon_4x4_ttype1.s", + "common/arm/ihevc_itrans_recon_8x8.s", + "common/arm/ihevc_mem_fns.s", + "common/arm/ihevc_padding.s", + "common/arm/ihevc_sao_band_offset_chroma.s", + "common/arm/ihevc_sao_band_offset_luma.s", + "common/arm/ihevc_sao_edge_offset_class0.s", + "common/arm/ihevc_sao_edge_offset_class0_chroma.s", + "common/arm/ihevc_sao_edge_offset_class1.s", + "common/arm/ihevc_sao_edge_offset_class1_chroma.s", + "common/arm/ihevc_sao_edge_offset_class2.s", + "common/arm/ihevc_sao_edge_offset_class2_chroma.s", + "common/arm/ihevc_sao_edge_offset_class3.s", + "common/arm/ihevc_sao_edge_offset_class3_chroma.s", + "common/arm/ihevc_weighted_pred_bi.s", + "common/arm/ihevc_weighted_pred_bi_default.s", + "common/arm/ihevc_weighted_pred_neon_intr.c", + "common/arm/ihevc_weighted_pred_uni.s", + "decoder/arm/ihevcd_fmt_conv_420sp_to_420p.s", + "decoder/arm/ihevcd_fmt_conv_420sp_to_420sp.s", + "decoder/arm/ihevcd_function_selector_a9q.c", + "decoder/arm/ihevcd_itrans_recon_dc_chroma.s", + "decoder/arm/ihevcd_itrans_recon_dc_luma.s", ], cflags: [ + // Modules coded with neon intrinsics are not not included in Android Build. "-DDISABLE_NEONINTR", "-DARM", "-DARMGCC", "-fno-tree-vectorize", - - // These will be overriden by armv7_a_neon - "-DDISABLE_NEON", - "-DDEFAULT_ARCH=D_ARCH_ARM_NONEON", + "-DDEFAULT_ARCH=D_ARCH_ARM_A9Q", ], instruction_set: "arm", - - neon: { - srcs: [ - "decoder/arm/ihevcd_function_selector_a9q.c", - "common/arm/ihevc_intra_ref_substitution_a9q.c", - "common/arm/ihevc_intra_pred_filters_neon_intr.c", - "common/arm/ihevc_weighted_pred_neon_intr.c", - "common/arm/ihevc_mem_fns.s", - "common/arm/ihevc_itrans_recon_32x32.s", - "common/arm/ihevc_weighted_pred_bi_default.s", - "common/arm/ihevc_weighted_pred_bi.s", - "common/arm/ihevc_weighted_pred_uni.s", - "common/arm/ihevc_deblk_luma_horz.s", - "common/arm/ihevc_deblk_luma_vert.s", - "common/arm/ihevc_deblk_chroma_vert.s", - "common/arm/ihevc_deblk_chroma_horz.s", - "common/arm/ihevc_sao_band_offset_luma.s", - "common/arm/ihevc_sao_band_offset_chroma.s", - "common/arm/ihevc_sao_edge_offset_class0.s", - "common/arm/ihevc_sao_edge_offset_class0_chroma.s", - "common/arm/ihevc_sao_edge_offset_class1.s", - "common/arm/ihevc_sao_edge_offset_class1_chroma.s", - "common/arm/ihevc_sao_edge_offset_class2.s", - "common/arm/ihevc_sao_edge_offset_class2_chroma.s", - "common/arm/ihevc_sao_edge_offset_class3.s", - "common/arm/ihevc_sao_edge_offset_class3_chroma.s", - "common/arm/ihevc_inter_pred_luma_horz_w16out.s", - "common/arm/ihevc_inter_pred_filters_luma_horz.s", - "common/arm/ihevc_inter_pred_filters_luma_vert.s", - "common/arm/ihevc_inter_pred_chroma_horz.s", - "common/arm/ihevc_inter_pred_chroma_horz_w16out.s", - "common/arm/ihevc_inter_pred_chroma_vert.s", - "common/arm/ihevc_inter_pred_chroma_vert_w16out.s", - "common/arm/ihevc_inter_pred_chroma_vert_w16inp.s", - "common/arm/ihevc_inter_pred_chroma_vert_w16inp_w16out.s", - "common/arm/ihevc_inter_pred_filters_luma_vert_w16inp.s", - "common/arm/ihevc_inter_pred_luma_vert_w16inp_w16out.s", - "common/arm/ihevc_inter_pred_luma_copy_w16out.s", - "common/arm/ihevc_inter_pred_luma_copy.s", - "common/arm/ihevc_inter_pred_chroma_copy.s", - "common/arm/ihevc_inter_pred_chroma_copy_w16out.s", - "common/arm/ihevc_itrans_recon_4x4_ttype1.s", - "common/arm/ihevc_itrans_recon_4x4.s", - "common/arm/ihevc_itrans_recon_8x8.s", - "common/arm/ihevc_itrans_recon_16x16.s", - "common/arm/ihevc_intra_pred_chroma_planar.s", - "common/arm/ihevc_intra_pred_chroma_dc.s", - "common/arm/ihevc_intra_pred_chroma_horz.s", - "common/arm/ihevc_intra_pred_chroma_ver.s", - "common/arm/ihevc_intra_pred_chroma_mode2.s", - "common/arm/ihevc_intra_pred_chroma_mode_18_34.s", - "common/arm/ihevc_intra_pred_filters_chroma_mode_11_to_17.s", - "common/arm/ihevc_intra_pred_filters_chroma_mode_19_to_25.s", - "common/arm/ihevc_intra_pred_chroma_mode_3_to_9.s", - "common/arm/ihevc_intra_pred_chroma_mode_27_to_33.s", - "common/arm/ihevc_intra_pred_luma_planar.s", - "common/arm/ihevc_intra_pred_luma_horz.s", - "common/arm/ihevc_intra_pred_luma_mode2.s", - "common/arm/ihevc_intra_pred_luma_mode_27_to_33.s", - "common/arm/ihevc_intra_pred_luma_mode_18_34.s", - "common/arm/ihevc_intra_pred_luma_vert.s", - "common/arm/ihevc_intra_pred_luma_dc.s", - "common/arm/ihevc_intra_pred_filters_luma_mode_11_to_17.s", - "common/arm/ihevc_intra_pred_filters_luma_mode_19_to_25.s", - "common/arm/ihevc_intra_pred_luma_mode_3_to_9.s", - "common/arm/ihevc_padding.s", - "decoder/arm/ihevcd_itrans_recon_dc_luma.s", - "decoder/arm/ihevcd_itrans_recon_dc_chroma.s", - "decoder/arm/ihevcd_fmt_conv_420sp_to_420p.s", - "decoder/arm/ihevcd_fmt_conv_420sp_to_420sp.s", - "decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s", - ], - cflags: [ - "-UDISABLE_NEON", - "-UDEFAULT_ARCH", - "-DDEFAULT_ARCH=D_ARCH_ARM_A9Q", - ], - }, }, x86_64: { @@ -324,37 +312,37 @@ cc_library_static { ], local_include_dirs: [ - "decoder/x86", "common/x86", + "decoder/x86", ], srcs: [ - "decoder/x86/ihevcd_function_selector.c", - "decoder/x86/ihevcd_function_selector_generic.c", - "decoder/x86/ihevcd_function_selector_ssse3.c", - "decoder/x86/ihevcd_function_selector_sse42.c", - "common/x86/ihevc_inter_pred_filters_ssse3_intr.c", - "common/x86/ihevc_weighted_pred_ssse3_intr.c", - "common/x86/ihevc_intra_pred_filters_ssse3_intr.c", - "common/x86/ihevc_chroma_intra_pred_filters_ssse3_intr.c", - "common/x86/ihevc_itrans_recon_ssse3_intr.c", - "common/x86/ihevc_itrans_recon_16x16_ssse3_intr.c", - "common/x86/ihevc_itrans_recon_32x32_ssse3_intr.c", - "common/x86/ihevc_sao_ssse3_intr.c", - "common/x86/ihevc_deblk_ssse3_intr.c", - "common/x86/ihevc_padding_ssse3_intr.c", - "common/x86/ihevc_mem_fns_ssse3_intr.c", - "decoder/x86/ihevcd_fmt_conv_ssse3_intr.c", - "decoder/x86/ihevcd_it_rec_dc_ssse3_intr.c", - "common/x86/ihevc_inter_pred_filters_sse42_intr.c", - "common/x86/ihevc_weighted_pred_sse42_intr.c", - "common/x86/ihevc_intra_pred_filters_sse42_intr.c", - "common/x86/ihevc_chroma_intra_pred_filters_sse42_intr.c", - "common/x86/ihevc_itrans_recon_sse42_intr.c", "common/x86/ihevc_16x16_itrans_recon_sse42_intr.c", "common/x86/ihevc_32x32_itrans_recon_sse42_intr.c", - "decoder/x86/ihevcd_it_rec_dc_sse42_intr.c", + "common/x86/ihevc_chroma_intra_pred_filters_sse42_intr.c", + "common/x86/ihevc_chroma_intra_pred_filters_ssse3_intr.c", + "common/x86/ihevc_deblk_ssse3_intr.c", + "common/x86/ihevc_inter_pred_filters_sse42_intr.c", + "common/x86/ihevc_inter_pred_filters_ssse3_intr.c", + "common/x86/ihevc_intra_pred_filters_sse42_intr.c", + "common/x86/ihevc_intra_pred_filters_ssse3_intr.c", + "common/x86/ihevc_itrans_recon_16x16_ssse3_intr.c", + "common/x86/ihevc_itrans_recon_32x32_ssse3_intr.c", + "common/x86/ihevc_itrans_recon_sse42_intr.c", + "common/x86/ihevc_itrans_recon_ssse3_intr.c", + "common/x86/ihevc_mem_fns_ssse3_intr.c", + "common/x86/ihevc_padding_ssse3_intr.c", + "common/x86/ihevc_sao_ssse3_intr.c", "common/x86/ihevc_tables_x86_intr.c", + "common/x86/ihevc_weighted_pred_sse42_intr.c", + "common/x86/ihevc_weighted_pred_ssse3_intr.c", + "decoder/x86/ihevcd_fmt_conv_ssse3_intr.c", + "decoder/x86/ihevcd_function_selector.c", + "decoder/x86/ihevcd_function_selector_generic.c", + "decoder/x86/ihevcd_function_selector_sse42.c", + "decoder/x86/ihevcd_function_selector_ssse3.c", + "decoder/x86/ihevcd_it_rec_dc_sse42_intr.c", + "decoder/x86/ihevcd_it_rec_dc_ssse3_intr.c", ], }, @@ -368,37 +356,37 @@ cc_library_static { ], local_include_dirs: [ - "decoder/x86", "common/x86", + "decoder/x86", ], srcs: [ - "decoder/x86/ihevcd_function_selector.c", - "decoder/x86/ihevcd_function_selector_generic.c", - "decoder/x86/ihevcd_function_selector_ssse3.c", - "decoder/x86/ihevcd_function_selector_sse42.c", - "common/x86/ihevc_inter_pred_filters_ssse3_intr.c", - "common/x86/ihevc_weighted_pred_ssse3_intr.c", - "common/x86/ihevc_intra_pred_filters_ssse3_intr.c", - "common/x86/ihevc_chroma_intra_pred_filters_ssse3_intr.c", - "common/x86/ihevc_itrans_recon_ssse3_intr.c", - "common/x86/ihevc_itrans_recon_16x16_ssse3_intr.c", - "common/x86/ihevc_itrans_recon_32x32_ssse3_intr.c", - "common/x86/ihevc_sao_ssse3_intr.c", - "common/x86/ihevc_deblk_ssse3_intr.c", - "common/x86/ihevc_padding_ssse3_intr.c", - "common/x86/ihevc_mem_fns_ssse3_intr.c", - "decoder/x86/ihevcd_fmt_conv_ssse3_intr.c", - "decoder/x86/ihevcd_it_rec_dc_ssse3_intr.c", - "common/x86/ihevc_inter_pred_filters_sse42_intr.c", - "common/x86/ihevc_weighted_pred_sse42_intr.c", - "common/x86/ihevc_intra_pred_filters_sse42_intr.c", - "common/x86/ihevc_chroma_intra_pred_filters_sse42_intr.c", - "common/x86/ihevc_itrans_recon_sse42_intr.c", "common/x86/ihevc_16x16_itrans_recon_sse42_intr.c", "common/x86/ihevc_32x32_itrans_recon_sse42_intr.c", - "decoder/x86/ihevcd_it_rec_dc_sse42_intr.c", + "common/x86/ihevc_chroma_intra_pred_filters_sse42_intr.c", + "common/x86/ihevc_chroma_intra_pred_filters_ssse3_intr.c", + "common/x86/ihevc_deblk_ssse3_intr.c", + "common/x86/ihevc_inter_pred_filters_sse42_intr.c", + "common/x86/ihevc_inter_pred_filters_ssse3_intr.c", + "common/x86/ihevc_intra_pred_filters_sse42_intr.c", + "common/x86/ihevc_intra_pred_filters_ssse3_intr.c", + "common/x86/ihevc_itrans_recon_16x16_ssse3_intr.c", + "common/x86/ihevc_itrans_recon_32x32_ssse3_intr.c", + "common/x86/ihevc_itrans_recon_sse42_intr.c", + "common/x86/ihevc_itrans_recon_ssse3_intr.c", + "common/x86/ihevc_mem_fns_ssse3_intr.c", + "common/x86/ihevc_padding_ssse3_intr.c", + "common/x86/ihevc_sao_ssse3_intr.c", "common/x86/ihevc_tables_x86_intr.c", + "common/x86/ihevc_weighted_pred_sse42_intr.c", + "common/x86/ihevc_weighted_pred_ssse3_intr.c", + "decoder/x86/ihevcd_fmt_conv_ssse3_intr.c", + "decoder/x86/ihevcd_function_selector.c", + "decoder/x86/ihevcd_function_selector_generic.c", + "decoder/x86/ihevcd_function_selector_sse42.c", + "decoder/x86/ihevcd_function_selector_ssse3.c", + "decoder/x86/ihevcd_it_rec_dc_sse42_intr.c", + "decoder/x86/ihevcd_it_rec_dc_ssse3_intr.c", ], }, riscv64: { @@ -418,7 +406,7 @@ cc_library_static { misc_undefined: ["bounds"], // Enable CFI if this becomes a shared library. cfi: true, - config: { + config: { cfi_assembly_support: true, }, blocklist: "libhevc_blocklist.txt", @@ -431,7 +419,7 @@ cc_library_static { min_sdk_version: "29", } -cc_test { +cc_binary { name: "hevcdec", host_supported: true, cflags: [ @@ -442,7 +430,6 @@ cc_test { "-Wall", "-Werror", ], - gtest: false, srcs: ["test/decoder/main.c"], static_libs: ["libhevcdec"], target: { @@ -454,16 +441,17 @@ cc_test { cc_library_static { name: "libhevcenc", + defaults: ["no_bti"], vendor_available: true, host_supported: true, cflags: [ "-DENABLE_MAIN_REXT_PROFILE", + "-DDISABLE_SEI", "-fPIC", "-O3", "-Wall", "-Wno-unused-variable", "-Wno-unused-parameter", - "-Wno-switch", ], export_include_dirs: [ @@ -475,6 +463,7 @@ cc_library_static { "common/ihevc_cabac_tables.c", "common/ihevc_chroma_intra_pred_filters.c", "common/ihevc_chroma_itrans_recon.c", + "common/ihevc_chroma_itrans_recon_32x32.c", "common/ihevc_chroma_itrans_recon_16x16.c", "common/ihevc_chroma_itrans_recon_8x8.c", "common/ihevc_common_tables.c", @@ -591,31 +580,18 @@ cc_library_static { arm64: { local_include_dirs: [ - "encoder/arm", "common/arm", "common/arm64", + "encoder/arm", ], srcs: [ - "encoder/arm/ihevce_coarse_layer_sad_neon.c", - "encoder/arm/ihevce_common_utils_neon.c", - "encoder/arm/ihevce_copy_neon.c", - "encoder/arm/ihevce_had_compute_neon.c", - "encoder/arm/ihevce_hme_utils_neon.c", - "encoder/arm/ihevce_itrans_recon_neon.c", - "encoder/arm/ihevce_me_neon.c", - "encoder/arm/ihevce_sad_compute_neon.c", - "encoder/arm/ihevce_scale_by_2_neon.c", - "encoder/arm/ihevce_scan_coeffs_neon.c", - "encoder/arm/ihevce_ssd_and_sad_calculator_neon.c", - "encoder/arm/ihevce_ssd_calculator_neon.c", - "encoder/arm/ihevce_subpel_neon.c", + "common/arm/ihevc_intra_pred_filters_neon_intr.c", + "common/arm/ihevc_intra_ref_substitution_a9q.c", + "common/arm/ihevc_quant_iquant_ssd_neon_intr.c", "common/arm/ihevc_resi_trans_neon.c", "common/arm/ihevc_resi_trans_neon_32x32.c", - "common/arm/ihevc_quant_iquant_ssd_neon_intr.c", - "common/arm/ihevc_intra_pred_filters_neon_intr.c", "common/arm/ihevc_weighted_pred_neon_intr.c", - "common/arm/ihevc_intra_ref_substitution_a9q.c", "common/arm64/ihevc_deblk_chroma_horz.s", "common/arm64/ihevc_deblk_chroma_vert.s", "common/arm64/ihevc_deblk_luma_horz.s", @@ -676,6 +652,19 @@ cc_library_static { "common/arm64/ihevc_weighted_pred_bi.s", "common/arm64/ihevc_weighted_pred_bi_default.s", "common/arm64/ihevc_weighted_pred_uni.s", + "encoder/arm/ihevce_coarse_layer_sad_neon.c", + "encoder/arm/ihevce_common_utils_neon.c", + "encoder/arm/ihevce_copy_neon.c", + "encoder/arm/ihevce_had_compute_neon.c", + "encoder/arm/ihevce_hme_utils_neon.c", + "encoder/arm/ihevce_itrans_recon_neon.c", + "encoder/arm/ihevce_me_neon.c", + "encoder/arm/ihevce_sad_compute_neon.c", + "encoder/arm/ihevce_scale_by_2_neon.c", + "encoder/arm/ihevce_scan_coeffs_neon.c", + "encoder/arm/ihevce_ssd_and_sad_calculator_neon.c", + "encoder/arm/ihevce_ssd_calculator_neon.c", + "encoder/arm/ihevce_subpel_neon.c", ], cflags: [ @@ -687,101 +676,98 @@ cc_library_static { arm: { local_include_dirs: [ - "encoder/arm", "common/arm", + "encoder/arm", ], + srcs: [ + "common/arm/ihevc_deblk_chroma_horz.s", + "common/arm/ihevc_deblk_chroma_vert.s", + "common/arm/ihevc_deblk_luma_horz.s", + "common/arm/ihevc_deblk_luma_vert.s", + "common/arm/ihevc_inter_pred_chroma_copy.s", + "common/arm/ihevc_inter_pred_chroma_copy_w16out.s", + "common/arm/ihevc_inter_pred_chroma_horz.s", + "common/arm/ihevc_inter_pred_chroma_horz_w16out.s", + "common/arm/ihevc_inter_pred_chroma_vert.s", + "common/arm/ihevc_inter_pred_chroma_vert_w16inp.s", + "common/arm/ihevc_inter_pred_chroma_vert_w16inp_w16out.s", + "common/arm/ihevc_inter_pred_chroma_vert_w16out.s", + "common/arm/ihevc_inter_pred_filters_luma_horz.s", + "common/arm/ihevc_inter_pred_filters_luma_vert.s", + "common/arm/ihevc_inter_pred_filters_luma_vert_w16inp.s", + "common/arm/ihevc_inter_pred_luma_copy.s", + "common/arm/ihevc_inter_pred_luma_copy_w16out.s", + "common/arm/ihevc_inter_pred_luma_horz_w16out.s", + "common/arm/ihevc_inter_pred_luma_vert_w16inp_w16out.s", + "common/arm/ihevc_intra_pred_chroma_dc.s", + "common/arm/ihevc_intra_pred_chroma_horz.s", + "common/arm/ihevc_intra_pred_chroma_mode2.s", + "common/arm/ihevc_intra_pred_chroma_mode_18_34.s", + "common/arm/ihevc_intra_pred_chroma_mode_27_to_33.s", + "common/arm/ihevc_intra_pred_chroma_mode_3_to_9.s", + "common/arm/ihevc_intra_pred_chroma_planar.s", + "common/arm/ihevc_intra_pred_chroma_ver.s", + "common/arm/ihevc_intra_pred_filters_chroma_mode_11_to_17.s", + "common/arm/ihevc_intra_pred_filters_chroma_mode_19_to_25.s", + "common/arm/ihevc_intra_pred_filters_luma_mode_11_to_17.s", + "common/arm/ihevc_intra_pred_filters_luma_mode_19_to_25.s", + "common/arm/ihevc_intra_pred_filters_neon_intr.c", + "common/arm/ihevc_intra_pred_luma_dc.s", + "common/arm/ihevc_intra_pred_luma_horz.s", + "common/arm/ihevc_intra_pred_luma_mode2.s", + "common/arm/ihevc_intra_pred_luma_mode_18_34.s", + "common/arm/ihevc_intra_pred_luma_mode_27_to_33.s", + "common/arm/ihevc_intra_pred_luma_mode_3_to_9.s", + "common/arm/ihevc_intra_pred_luma_planar.s", + "common/arm/ihevc_intra_pred_luma_vert.s", + "common/arm/ihevc_intra_ref_substitution_a9q.c", + "common/arm/ihevc_itrans_recon_16x16.s", + "common/arm/ihevc_itrans_recon_32x32.s", + "common/arm/ihevc_itrans_recon_4x4.s", + "common/arm/ihevc_itrans_recon_4x4_ttype1.s", + "common/arm/ihevc_itrans_recon_8x8.s", + "common/arm/ihevc_mem_fns.s", + "common/arm/ihevc_padding.s", + "common/arm/ihevc_quant_iquant_ssd_neon_intr.c", + "common/arm/ihevc_resi_trans.s", + "common/arm/ihevc_resi_trans_32x32_a9q.s", + "common/arm/ihevc_resi_trans_neon.c", + "common/arm/ihevc_resi_trans_neon_32x32.c", + "common/arm/ihevc_sao_band_offset_chroma.s", + "common/arm/ihevc_sao_band_offset_luma.s", + "common/arm/ihevc_sao_edge_offset_class0.s", + "common/arm/ihevc_sao_edge_offset_class0_chroma.s", + "common/arm/ihevc_sao_edge_offset_class1.s", + "common/arm/ihevc_sao_edge_offset_class1_chroma.s", + "common/arm/ihevc_sao_edge_offset_class2.s", + "common/arm/ihevc_sao_edge_offset_class2_chroma.s", + "common/arm/ihevc_sao_edge_offset_class3.s", + "common/arm/ihevc_sao_edge_offset_class3_chroma.s", + "common/arm/ihevc_weighted_pred_bi.s", + "common/arm/ihevc_weighted_pred_bi_default.s", + "common/arm/ihevc_weighted_pred_neon_intr.c", + "common/arm/ihevc_weighted_pred_uni.s", + "encoder/arm/ihevce_coarse_layer_sad_neon.c", + "encoder/arm/ihevce_common_utils_neon.c", + "encoder/arm/ihevce_copy_neon.c", + "encoder/arm/ihevce_had_compute_neon.c", + "encoder/arm/ihevce_hme_utils_neon.c", + "encoder/arm/ihevce_itrans_recon_neon.c", + "encoder/arm/ihevce_me_neon.c", + "encoder/arm/ihevce_sad_compute_neon.c", + "encoder/arm/ihevce_scale_by_2_neon.c", + "encoder/arm/ihevce_scan_coeffs_neon.c", + "encoder/arm/ihevce_ssd_and_sad_calculator_neon.c", + "encoder/arm/ihevce_ssd_calculator_neon.c", + "encoder/arm/ihevce_subpel_neon.c", + ], + + cflags: [ + "-DENABLE_NEON", + "-DARM", + ], instruction_set: "arm", - - neon: { - srcs: [ - "encoder/arm/ihevce_coarse_layer_sad_neon.c", - "encoder/arm/ihevce_common_utils_neon.c", - "encoder/arm/ihevce_copy_neon.c", - "encoder/arm/ihevce_had_compute_neon.c", - "encoder/arm/ihevce_hme_utils_neon.c", - "encoder/arm/ihevce_itrans_recon_neon.c", - "encoder/arm/ihevce_me_neon.c", - "encoder/arm/ihevce_sad_compute_neon.c", - "encoder/arm/ihevce_scale_by_2_neon.c", - "encoder/arm/ihevce_scan_coeffs_neon.c", - "encoder/arm/ihevce_ssd_and_sad_calculator_neon.c", - "encoder/arm/ihevce_ssd_calculator_neon.c", - "encoder/arm/ihevce_subpel_neon.c", - "common/arm/ihevc_resi_trans_neon.c", - "common/arm/ihevc_resi_trans_neon_32x32.c", - "common/arm/ihevc_quant_iquant_ssd_neon_intr.c", - "common/arm/ihevc_intra_pred_filters_neon_intr.c", - "common/arm/ihevc_weighted_pred_neon_intr.c", - "common/arm/ihevc_intra_ref_substitution_a9q.c", - "common/arm/ihevc_deblk_chroma_horz.s", - "common/arm/ihevc_deblk_chroma_vert.s", - "common/arm/ihevc_deblk_luma_horz.s", - "common/arm/ihevc_deblk_luma_vert.s", - "common/arm/ihevc_inter_pred_chroma_copy.s", - "common/arm/ihevc_inter_pred_chroma_copy_w16out.s", - "common/arm/ihevc_inter_pred_chroma_horz.s", - "common/arm/ihevc_inter_pred_chroma_horz_w16out.s", - "common/arm/ihevc_inter_pred_chroma_vert.s", - "common/arm/ihevc_inter_pred_chroma_vert_w16inp.s", - "common/arm/ihevc_inter_pred_chroma_vert_w16inp_w16out.s", - "common/arm/ihevc_inter_pred_chroma_vert_w16out.s", - "common/arm/ihevc_inter_pred_filters_luma_horz.s", - "common/arm/ihevc_inter_pred_filters_luma_vert.s", - "common/arm/ihevc_inter_pred_filters_luma_vert_w16inp.s", - "common/arm/ihevc_inter_pred_luma_copy.s", - "common/arm/ihevc_inter_pred_luma_copy_w16out.s", - "common/arm/ihevc_inter_pred_luma_horz_w16out.s", - "common/arm/ihevc_inter_pred_luma_vert_w16inp_w16out.s", - "common/arm/ihevc_intra_pred_chroma_dc.s", - "common/arm/ihevc_intra_pred_chroma_horz.s", - "common/arm/ihevc_intra_pred_chroma_mode2.s", - "common/arm/ihevc_intra_pred_chroma_mode_18_34.s", - "common/arm/ihevc_intra_pred_chroma_mode_27_to_33.s", - "common/arm/ihevc_intra_pred_chroma_mode_3_to_9.s", - "common/arm/ihevc_intra_pred_chroma_planar.s", - "common/arm/ihevc_intra_pred_chroma_ver.s", - "common/arm/ihevc_intra_pred_filters_chroma_mode_11_to_17.s", - "common/arm/ihevc_intra_pred_filters_chroma_mode_19_to_25.s", - "common/arm/ihevc_intra_pred_filters_luma_mode_11_to_17.s", - "common/arm/ihevc_intra_pred_filters_luma_mode_19_to_25.s", - "common/arm/ihevc_intra_pred_luma_dc.s", - "common/arm/ihevc_intra_pred_luma_horz.s", - "common/arm/ihevc_intra_pred_luma_mode2.s", - "common/arm/ihevc_intra_pred_luma_mode_18_34.s", - "common/arm/ihevc_intra_pred_luma_mode_27_to_33.s", - "common/arm/ihevc_intra_pred_luma_mode_3_to_9.s", - "common/arm/ihevc_intra_pred_luma_planar.s", - "common/arm/ihevc_intra_pred_luma_vert.s", - "common/arm/ihevc_itrans_recon_16x16.s", - "common/arm/ihevc_itrans_recon_32x32.s", - "common/arm/ihevc_itrans_recon_4x4.s", - "common/arm/ihevc_itrans_recon_4x4_ttype1.s", - "common/arm/ihevc_itrans_recon_8x8.s", - "common/arm/ihevc_resi_trans.s", - "common/arm/ihevc_resi_trans_32x32_a9q.s", - "common/arm/ihevc_mem_fns.s", - "common/arm/ihevc_padding.s", - "common/arm/ihevc_sao_band_offset_chroma.s", - "common/arm/ihevc_sao_band_offset_luma.s", - "common/arm/ihevc_sao_edge_offset_class0.s", - "common/arm/ihevc_sao_edge_offset_class0_chroma.s", - "common/arm/ihevc_sao_edge_offset_class1.s", - "common/arm/ihevc_sao_edge_offset_class1_chroma.s", - "common/arm/ihevc_sao_edge_offset_class2.s", - "common/arm/ihevc_sao_edge_offset_class2_chroma.s", - "common/arm/ihevc_sao_edge_offset_class3.s", - "common/arm/ihevc_sao_edge_offset_class3_chroma.s", - "common/arm/ihevc_weighted_pred_bi_default.s", - "common/arm/ihevc_weighted_pred_bi.s", - "common/arm/ihevc_weighted_pred_uni.s", - ], - - cflags: [ - "-DENABLE_NEON", - "-DARM", - ], - }, }, x86_64: { @@ -807,7 +793,7 @@ cc_library_static { misc_undefined: ["bounds"], // Enable CFI if this becomes a shared library. cfi: true, - config: { + config: { cfi_assembly_support: true, }, blocklist: "libhevc_blocklist.txt", @@ -820,7 +806,7 @@ cc_library_static { min_sdk_version: "29", } -cc_test { +cc_binary { name: "hevcenc", host_supported: true, cflags: [ @@ -829,7 +815,6 @@ cc_test { "-Wall", "-Werror", ], - gtest: false, srcs: ["test/encoder/main.c"], static_libs: ["libhevcenc"], sanitize: { diff --git a/CMakeLists.txt b/CMakeLists.txt index 590f117..5fe0e2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,17 @@ cmake_minimum_required(VERSION 3.9.1) project(libhevc C CXX) -enable_language(ASM) + +if(NOT DEFINED SYSTEM_NAME) + set(SYSTEM_NAME ${CMAKE_HOST_SYSTEM_NAME}) +endif() + +if(NOT DEFINED SYSTEM_PROCESSOR) + set(SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}) +endif() + +if(NOT "${SYSTEM_NAME}" STREQUAL "Darwin") + enable_language(ASM) +endif() set(HEVC_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") set(HEVC_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}") @@ -31,6 +42,7 @@ endif() include("${HEVC_ROOT}/cmake/utils.cmake") libhevc_add_compile_options() +libhevc_add_gtest() libhevc_add_definitions() libhevc_set_link_libraries() @@ -43,3 +55,5 @@ include("${HEVC_ROOT}/test/encoder/hevcenc.cmake") include("${HEVC_ROOT}/fuzzer/hevc_dec_fuzzer.cmake") include("${HEVC_ROOT}/fuzzer/hevc_enc_fuzzer.cmake") + +include("${HEVC_ROOT}/tests/common/common.cmake") diff --git a/OWNERS b/OWNERS index e234552..571fe48 100644 --- a/OWNERS +++ b/OWNERS @@ -1,3 +1,4 @@ # owners for external/libhevc +include platform/frameworks/av:/media/janitors/avic_OWNERS include platform/frameworks/av:/media/janitors/codec_OWNERS essick@google.com diff --git a/README.md b/README.md index a9e5758..ad3f5b6 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ Supports: - aarch32/aarch64 on Linux. - aarch32/aarch64 on Android. - x86_32/x86_64 on Linux. +- aarch64 on Mac. +- x86_64 on Mac. ## Native Builds Use the following commands for building on the target machine @@ -51,3 +53,29 @@ $ make $ cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch32_toolchain.cmake $ make ``` + +### Building for android +NOTE: This assumes that you are building on a machine that has + [Android NDK](https://developer.android.com/ndk/downloads). + +``` +$ cd external/libhevc +$ mkdir build +$ cd build +``` + +#### Armv7 (32-bit) + + cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/android_toolchain.cmake\ + -DHEVC_ANDROID_NDK_PATH=/opt/android-ndk-r26d/\ + -DANDROID_ABI=armeabi-v7a\ + -DANDROID_PLATFORM=android-23 ../ + make + +#### Armv8 (64-bit) + + cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/android_toolchain.cmake\ + -DHEVC_ANDROID_NDK_PATH=/opt/android-ndk-r26d/\ + -DANDROID_ABI=arm64-v8a\ + -DANDROID_PLATFORM=android-23 ../ + make diff --git a/cmake/toolchains/aarch32_toolchain.cmake b/cmake/toolchains/aarch32_toolchain.cmake index 5fabde2..4939299 100644 --- a/cmake/toolchains/aarch32_toolchain.cmake +++ b/cmake/toolchains/aarch32_toolchain.cmake @@ -1,7 +1,10 @@ -set(CMAKE_SYSTEM_NAME Linux) -set(CMAKE_SYSTEM_PROCESSOR aarch32) +set(SYSTEM_NAME Linux) +set(SYSTEM_PROCESSOR aarch32) # Modify these variables with paths to appropriate compilers that can produce # armv7 targets set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc) set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++) + +# Build all binaries as static, so that they can be run using qemu +set(CMAKE_EXE_LINKER_FLAGS "-static") diff --git a/cmake/toolchains/aarch64_toolchain.cmake b/cmake/toolchains/aarch64_toolchain.cmake index 5efbf77..c3fc290 100644 --- a/cmake/toolchains/aarch64_toolchain.cmake +++ b/cmake/toolchains/aarch64_toolchain.cmake @@ -1,5 +1,5 @@ -set(CMAKE_SYSTEM_NAME Linux) -set(CMAKE_SYSTEM_PROCESSOR aarch64) +set(SYSTEM_NAME Linux) +set(SYSTEM_PROCESSOR aarch64) # Modify these variables with paths to appropriate compilers that can produce # armv8 targets @@ -11,3 +11,6 @@ set(CMAKE_C_COMPILER_AR set(CMAKE_CXX_COMPILER_AR aarch64-linux-gnu-gcc-ar CACHE FILEPATH "Archiver") + +# Build all binaries as static, so that they can be run using qemu +set(CMAKE_EXE_LINKER_FLAGS "-static") diff --git a/cmake/toolchains/android_toolchain.cmake b/cmake/toolchains/android_toolchain.cmake new file mode 100644 index 0000000..e661b8a --- /dev/null +++ b/cmake/toolchains/android_toolchain.cmake @@ -0,0 +1,34 @@ +set(SYSTEM_NAME Android) +set(CMAKE_SYSTEM_NAME Android) + +if(NOT ANDROID_PLATFORM) + set(ANDROID_PLATFORM android-23) +endif() + +# Choose target architecture with: +# -DANDROID_ABI={armeabi-v7a, arm64-v8a, x86, x86_64} +if(NOT ANDROID_ABI) + set(ANDROID_ABI arm64-v8a) +endif() + +if(ANDROID_ABI MATCHES "^armeabi") + set(SYSTEM_PROCESSOR aarch32) +else() + set(SYSTEM_PROCESSOR aarch64) +endif() + +# Toolchain files don't have access to cached variables: +# https://gitlab.kitware.com/cmake/cmake/issues/16170. Set an intermediate +# environment variable when loaded the first time. +if(HEVC_ANDROID_NDK_PATH) + set(ENV{HEVC_ANDROID_NDK_PATH} "${HEVC_ANDROID_NDK_PATH}") +else() + set(HEVC_ANDROID_NDK_PATH "$ENV{HEVC_ANDROID_NDK_PATH}") +endif() + +if(NOT HEVC_ANDROID_NDK_PATH) + message(FATAL_ERROR "HEVC_ANDROID_NDK_PATH not set.") + return() +endif() + +include("${HEVC_ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake") \ No newline at end of file diff --git a/cmake/utils.cmake b/cmake/utils.cmake index e8b97ac..f921656 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -2,9 +2,9 @@ include(CheckCXXCompilerFlag) # Adds compiler options for all targets function(libhevc_add_compile_options) - if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") + if("${SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${SYSTEM_PROCESSOR}" STREQUAL "arm64") add_compile_options(-march=armv8-a) - elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch32") + elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch32") add_compile_options(-march=armv7-a -mfpu=neon) else() add_compile_options(-msse4.2 -mno-avx) @@ -32,9 +32,15 @@ endfunction() # Adds defintions for all targets function(libhevc_add_definitions) - if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") + if("${SYSTEM_NAME}" STREQUAL "Darwin") + if("${SYSTEM_PROCESSOR}" STREQUAL "arm64") + add_definitions(-DARMV8 -DDARWIN -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC) + else() + add_definitions(-DX86 -DDARWIN -DDISABLE_AVX2 -DDEFAULT_ARCH=D_ARCH_X86_GENERIC) + endif() + elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch64") add_definitions(-DARMV8 -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC -DENABLE_NEON) - elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch32") + elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch32") add_definitions(-DARMV7 -DDEFAULT_ARCH=D_ARCH_ARM_A9Q -DENABLE_NEON -DDISABLE_NEONINTR) else() @@ -112,3 +118,35 @@ endfunction() function(libhevc_add_fuzzer NAME LIB) libhevc_add_executable(${NAME} ${LIB} FUZZER 1 ${ARGV}) endfunction() + +# Adds GoogleTest and Threads dependency +function(libhevc_add_gtest) + include(FetchContent) + FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip + ) + # For Windows: Prevent overriding the parent project's compiler/linker settings + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + FetchContent_MakeAvailable(googletest) +endfunction() + +# cmake-format: off +# Adds a target for a gtest executable +# +# Arguments: +# NAME: Name of the executable +# +# Optional Arguments: +# SOURCES: Additional source files +# cmake-format: on +function(libhevc_add_gtest_executable NAME) + set(multi_value_args SOURCES) + cmake_parse_arguments(ARG "" "" "${multi_value_args}" ${ARGN}) + + libhevc_add_executable( + ${NAME} libhevcdec + SOURCES ${HEVC_ROOT}/tests/common/func_selector.cc + ${HEVC_ROOT}/tests/common/tests_common.cc ${ARG_SOURCES} + LIBS GTest::gtest_main) +endfunction() diff --git a/common/arm/ihevc_intra_pred_filters_neon_intr.c b/common/arm/ihevc_intra_pred_filters_neon_intr.c index 190cccc..11749d1 100644 --- a/common/arm/ihevc_intra_pred_filters_neon_intr.c +++ b/common/arm/ihevc_intra_pred_filters_neon_intr.c @@ -438,6 +438,11 @@ void ihevc_intra_pred_luma_ref_substitution_neonintr(UWORD8 *pu1_top_left, * @param[in] mode * integer intraprediction mode * + * @param[in] intra_smoothing_flags + * integer bit 3 indicates if intra smoothing is enabled/disabled + * unconditionally. this is applicable to frext profiles only + * bit 0 indicates strong intra smoothing enabled/disabled + * * @returns * * @remarks @@ -451,7 +456,7 @@ void ihevc_intra_pred_ref_filtering_neonintr(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst, WORD32 mode, - WORD32 strong_intra_smoothing_enable_flag) + WORD32 intra_smoothing_flags) { WORD32 filter_flag; WORD32 i = 0; @@ -475,10 +480,12 @@ void ihevc_intra_pred_ref_filtering_neonintr(UWORD8 *pu1_src, WORD32 abs_cond_left_flag = 0; WORD32 abs_cond_top_flag = 0; WORD32 dc_val = 1 << (BIT_DEPTH - 5); + WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3); + WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1; + shift_res = vdup_n_u8(0); - - filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)); - + filter_flag = intra_smoothing_disabled ? + 0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2))); if(0 == filter_flag) { if(pu1_src == pu1_dst) diff --git a/common/arm/ihevc_intra_ref_substitution_a9q.c b/common/arm/ihevc_intra_ref_substitution_a9q.c index fcdf0f2..a7a3ee8 100644 --- a/common/arm/ihevc_intra_ref_substitution_a9q.c +++ b/common/arm/ihevc_intra_ref_substitution_a9q.c @@ -103,7 +103,8 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, WORD32 nt, WORD32 nbr_flags, UWORD8 *pu1_dst, - WORD32 dst_strd) + WORD32 dst_strd, + WORD32 chroma_format_idc) { UWORD8 pu1_ref_u, pu1_ref_v; WORD32 dc_val, i, j; @@ -180,7 +181,7 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, // U-V interleaved Top-top right samples } - if(nt == 4) + if(nt == 4 || (nt == 8 && chroma_format_idc == CHROMA_FMT_IDC_YUV444)) { /* 1 bit extraction for all the neighboring blocks */ tp_left = (nbr_flags & 0x10000) >> 16; @@ -248,8 +249,9 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, } } - else if(nt == 8) + else if(nt == 8 || (nt == 16 && chroma_format_idc == CHROMA_FMT_IDC_YUV444)) { + WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1; WORD32 nbr_flags_temp = 0; nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4) + ((nbr_flags & 0x300) >> 4) @@ -259,16 +261,16 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/ /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */ { - nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 4; /* for bottom left and left */ - if(nbr_id_from_bl == 32) - nbr_id_from_bl = 16; - if(nbr_id_from_bl == 16) + nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * (4 * sub_sample); /* for bottom left and left */ + if(nbr_id_from_bl == 32 * sub_sample) + nbr_id_from_bl = 16 * sub_sample; + if(nbr_id_from_bl == 16 * sub_sample) { /* for top left : 1 pel per nbr bit */ if(!((nbr_flags_temp >> 8) & 0x1)) { nbr_id_from_bl++; - nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4; /* top and top right; 8 pels per nbr bit */ + nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4 * sub_sample; /* top and top right; 8 pels per nbr bit */ } } @@ -287,14 +289,14 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, } /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */ - while(nbr_id_from_bl < ((T8C_4NT)+1)) + while(nbr_id_from_bl < ((T8C_4NT * sub_sample)+1)) { /* To Obtain the next unavailable idx flag after reverse neighbor substitution */ /* Divide by 8 to obtain the original index */ - frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/ + frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/ /* The Top-left flag is at the last bit location of nbr_flags*/ - if(nbr_id_from_bl == (T8C_4NT / 2)) + if(nbr_id_from_bl == (T8C_4NT * sub_sample / 2)) { get_bits = GET_BIT(nbr_flags_temp, 8); @@ -313,22 +315,23 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, UWORD16 *pu2_dst; /* 8 pel substitution (other than TL) */ pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2]; - ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4); + ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample); } } - nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT / 2)) ? 1 : 4; + nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample; } } - else if(nt == 16) + else if(nt == 16 || (nt == 32 && chroma_format_idc == CHROMA_FMT_IDC_YUV444)) { + WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1; /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/ /* as each bit in nbr flags corresponds to 4 pels for bot_left, left, top and topright but 1 pel for topleft */ { - nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4; /* for bottom left and left */ + nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4 * sub_sample; /* for bottom left and left */ - if(nbr_id_from_bl == 32) + if(nbr_id_from_bl == 32 * sub_sample) { /* for top left : 1 pel per nbr bit */ if(!((nbr_flags >> 16) & 0x1)) @@ -336,7 +339,7 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, /* top left not available */ nbr_id_from_bl++; /* top and top right; 4 pels per nbr bit */ - nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4; + nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4 * sub_sample; } } /* Reverse Substitution Process*/ @@ -354,14 +357,14 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, } /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */ - while(nbr_id_from_bl < ((T16C_4NT)+1)) + while(nbr_id_from_bl < ((T16C_4NT * sub_sample)+1)) { /* To Obtain the next unavailable idx flag after reverse neighbor substitution */ /* Devide by 4 to obtain the original index */ - frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/ + frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/ /* The Top-left flag is at the last bit location of nbr_flags*/ - if(nbr_id_from_bl == (T16C_4NT / 2)) + if(nbr_id_from_bl == (T16C_4NT * sub_sample / 2)) { get_bits = GET_BIT(nbr_flags, 16); /* only pel substitution for TL */ @@ -379,11 +382,11 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left, UWORD16 *pu2_dst; /* 4 pel substitution (other than TL) */ pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2]; - ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4); + ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample); } } - nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT / 2)) ? 1 : 4; + nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample; } } } diff --git a/common/common.cmake b/common/common.cmake index 839bd78..5a926cc 100644 --- a/common/common.cmake +++ b/common/common.cmake @@ -18,6 +18,7 @@ list( "${HEVC_ROOT}/common/ihevc_trans_tables.c" "${HEVC_ROOT}/common/ihevc_recon.c" "${HEVC_ROOT}/common/ihevc_itrans.c" + "${HEVC_ROOT}/common/ihevc_itrans_res.c" "${HEVC_ROOT}/common/ihevc_itrans_recon.c" "${HEVC_ROOT}/common/ihevc_iquant_recon.c" "${HEVC_ROOT}/common/ihevc_iquant_itrans_recon.c" @@ -29,6 +30,7 @@ list( "${HEVC_ROOT}/common/ihevc_chroma_iquant_itrans_recon.c" "${HEVC_ROOT}/common/ihevc_chroma_recon.c" "${HEVC_ROOT}/common/ihevc_chroma_itrans_recon_16x16.c" + "${HEVC_ROOT}/common/ihevc_chroma_itrans_recon_32x32.c" "${HEVC_ROOT}/common/ihevc_chroma_itrans_recon_8x8.c" "${HEVC_ROOT}/common/ihevc_buf_mgr.c" "${HEVC_ROOT}/common/ihevc_disp_mgr.c" @@ -62,7 +64,7 @@ list( include_directories(${HEVC_ROOT}/common) # arm/x86 sources -if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") +if("${SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${SYSTEM_PROCESSOR}" STREQUAL "arm64") list( APPEND LIBHEVC_COMMON_ASMS @@ -135,7 +137,7 @@ if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") "${HEVC_ROOT}/common/arm64/ihevc_weighted_pred_uni.s") include_directories(${HEVC_ROOT}/common/arm64 ${HEVC_ROOT}/common/arm) -elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32") +elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch32") list( APPEND LIBHEVC_COMMON_ASMS diff --git a/common/ihevc_cabac_tables.c b/common/ihevc_cabac_tables.c index fb10f3e..bb28d75 100644 --- a/common/ihevc_cabac_tables.c +++ b/common/ihevc_cabac_tables.c @@ -253,10 +253,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 33, 17, 49, 49, 110, 14, 49, 17, 49, 49, 110, 14, 49, 17, 49, 49, 110, 14, 49, 33, 17, 62, 62, 30, 30, 30, 30, 14, 30, 17, - 81, 30, 17, 81, 33, 33, 14, 1, 33, 30, - 1, 17, 14, 1, 78, 33, 17, 17, 1, 30, - 33, 110, 62, 62, 33, 110, 1, 78, 1, 14, - 30, 46, 30, 30, + 81, 30, 17, 81, 49, 81, 33, 33, 14, 1, + 33, 30, 1, 17, 14, 1, 78, 33, 17, 17, + 1, 30, 33, 110, 62, 62, 33, 110, 1, 78, + 1, 14, 30, 46, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 81, 81, 81, 81, + }, { @@ -274,10 +276,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 31, 15, 47, 47, 110, 14, 47, 15, 47, 47, 110, 14, 47, 15, 47, 47, 110, 14, 47, 31, 15, 62, 62, 30, 32, 30, 32, 14, 32, 15, - 79, 32, 15, 79, 31, 29, 16, 0, 31, 30, - 0, 15, 14, 2, 78, 29, 15, 15, 0, 30, - 31, 110, 62, 62, 31, 108, 0, 78, 0, 14, - 32, 46, 30, 30, + 79, 32, 15, 79, 47, 79, 31, 29, 16, 0, + 31, 30, 0, 15, 14, 2, 78, 29, 15, 15, + 0, 30, 31, 110, 62, 62, 31, 108, 0, 78, + 0, 14, 32, 46, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 83, 83, 83, 83, + }, { @@ -295,10 +299,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 29, 13, 45, 47, 108, 14, 45, 13, 45, 47, 108, 14, 45, 13, 45, 47, 108, 14, 45, 31, 15, 60, 60, 30, 32, 30, 32, 14, 32, 15, - 77, 32, 15, 77, 31, 27, 16, 0, 31, 30, - 0, 15, 14, 6, 78, 27, 15, 13, 2, 30, - 31, 108, 62, 60, 31, 104, 2, 76, 0, 14, - 32, 46, 30, 30, + 77, 32, 15, 77, 47, 77, 31, 27, 16, 0, + 31, 30, 0, 15, 14, 6, 78, 27, 15, 13, + 2, 30, 31, 108, 62, 60, 31, 104, 2, 76, + 0, 14, 32, 46, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 87, 87, 87, 87, + }, { @@ -316,10 +322,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 29, 11, 45, 47, 108, 14, 45, 11, 45, 47, 108, 14, 45, 11, 45, 47, 108, 14, 45, 31, 15, 60, 60, 30, 32, 30, 32, 14, 32, 15, - 75, 32, 15, 75, 31, 25, 16, 0, 31, 30, - 0, 15, 14, 8, 78, 25, 15, 11, 2, 30, - 31, 108, 62, 60, 31, 102, 2, 74, 0, 14, - 32, 46, 30, 30, + 75, 32, 15, 75, 47, 75, 31, 25, 16, 0, + 31, 30, 0, 15, 14, 8, 78, 25, 15, 11, + 2, 30, 31, 108, 62, 60, 31, 102, 2, 74, + 0, 14, 32, 46, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 91, 91, 91, 91, + }, { @@ -337,10 +345,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 27, 9, 43, 45, 106, 14, 43, 9, 43, 45, 106, 14, 43, 9, 43, 45, 106, 14, 43, 29, 13, 58, 58, 30, 34, 30, 34, 14, 34, 13, - 73, 34, 13, 73, 29, 23, 18, 2, 29, 30, - 2, 13, 14, 12, 78, 23, 13, 9, 4, 30, - 29, 106, 60, 58, 29, 98, 4, 72, 2, 14, - 34, 44, 30, 30, + 73, 34, 13, 73, 45, 73, 29, 23, 18, 2, + 29, 30, 2, 13, 14, 12, 78, 23, 13, 9, + 4, 30, 29, 106, 60, 58, 29, 98, 4, 72, + 2, 14, 34, 44, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 95, 95, 95, 95, + }, { @@ -358,10 +368,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 25, 7, 41, 45, 104, 14, 41, 7, 41, 45, 104, 14, 41, 7, 41, 45, 104, 14, 41, 29, 13, 56, 56, 30, 34, 30, 34, 14, 34, 13, - 71, 34, 13, 71, 29, 19, 18, 2, 29, 30, - 2, 13, 14, 14, 78, 19, 13, 7, 6, 30, - 29, 104, 60, 56, 29, 96, 6, 70, 2, 14, - 34, 44, 30, 30, + 71, 34, 13, 71, 45, 71, 29, 19, 18, 2, + 29, 30, 2, 13, 14, 14, 78, 19, 13, 7, + 6, 30, 29, 104, 60, 56, 29, 96, 6, 70, + 2, 14, 34, 44, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, + }, { @@ -379,10 +391,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 25, 5, 41, 45, 104, 14, 41, 5, 41, 45, 104, 14, 41, 5, 41, 45, 104, 14, 41, 29, 13, 56, 56, 30, 34, 30, 34, 14, 34, 13, - 69, 34, 13, 69, 29, 17, 18, 2, 29, 30, - 2, 13, 14, 18, 78, 17, 13, 5, 6, 30, - 29, 104, 60, 56, 29, 92, 6, 68, 2, 14, - 34, 44, 30, 30, + 69, 34, 13, 69, 45, 69, 29, 17, 18, 2, + 29, 30, 2, 13, 14, 18, 78, 17, 13, 5, + 6, 30, 29, 104, 60, 56, 29, 92, 6, 68, + 2, 14, 34, 44, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 103, 103, 103, 103, + }, { @@ -400,10 +414,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 23, 3, 39, 43, 102, 14, 39, 3, 39, 43, 102, 14, 39, 3, 39, 43, 102, 14, 39, 27, 11, 54, 54, 30, 36, 30, 36, 14, 36, 11, - 67, 36, 11, 67, 27, 15, 20, 4, 27, 30, - 4, 11, 14, 20, 78, 15, 11, 3, 8, 30, - 27, 102, 58, 54, 27, 90, 8, 66, 4, 14, - 36, 42, 30, 30, + 67, 36, 11, 67, 43, 67, 27, 15, 20, 4, + 27, 30, 4, 11, 14, 20, 78, 15, 11, 3, + 8, 30, 27, 102, 58, 54, 27, 90, 8, 66, + 4, 14, 36, 42, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 107, 107, 107, 107, + }, { @@ -421,10 +437,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 23, 1, 39, 43, 100, 14, 39, 1, 39, 43, 100, 14, 39, 1, 39, 43, 100, 14, 39, 27, 11, 52, 52, 30, 36, 30, 36, 14, 36, 11, - 65, 36, 11, 65, 27, 13, 20, 4, 27, 30, - 4, 11, 14, 24, 78, 13, 11, 1, 8, 30, - 27, 100, 58, 52, 27, 86, 8, 64, 4, 14, - 36, 42, 30, 30, + 65, 36, 11, 65, 43, 65, 27, 13, 20, 4, + 27, 30, 4, 11, 14, 24, 78, 13, 11, 1, + 8, 30, 27, 100, 58, 52, 27, 86, 8, 64, + 4, 14, 36, 42, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 111, 111, 111, 111, + }, { @@ -442,10 +460,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 21, 0, 37, 43, 100, 14, 37, 0, 37, 43, 100, 14, 37, 0, 37, 43, 100, 14, 37, 27, 11, 52, 52, 30, 36, 30, 36, 14, 36, 11, - 63, 36, 11, 63, 27, 9, 20, 4, 27, 30, - 4, 11, 14, 28, 78, 9, 11, 0, 10, 30, - 27, 100, 58, 52, 27, 82, 10, 62, 4, 14, - 36, 42, 30, 30, + 63, 36, 11, 63, 43, 63, 27, 9, 20, 4, + 27, 30, 4, 11, 14, 28, 78, 9, 11, 0, + 10, 30, 27, 100, 58, 52, 27, 82, 10, 62, + 4, 14, 36, 42, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 113, 113, 113, 113, + }, { @@ -463,10 +483,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 19, 2, 35, 41, 98, 14, 35, 2, 35, 41, 98, 14, 35, 2, 35, 41, 98, 14, 35, 25, 9, 50, 50, 30, 38, 30, 38, 14, 38, 9, - 61, 38, 9, 61, 25, 7, 22, 6, 25, 30, - 6, 9, 14, 30, 78, 7, 9, 2, 12, 30, - 25, 98, 56, 50, 25, 80, 12, 60, 6, 14, - 38, 40, 30, 30, + 61, 38, 9, 61, 41, 61, 25, 7, 22, 6, + 25, 30, 6, 9, 14, 30, 78, 7, 9, 2, + 12, 30, 25, 98, 56, 50, 25, 80, 12, 60, + 6, 14, 38, 40, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 117, 117, 117, 117, + }, { @@ -484,10 +506,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 19, 4, 35, 41, 98, 14, 35, 4, 35, 41, 98, 14, 35, 4, 35, 41, 98, 14, 35, 25, 9, 50, 50, 30, 38, 30, 38, 14, 38, 9, - 59, 38, 9, 59, 25, 5, 22, 6, 25, 30, - 6, 9, 14, 34, 78, 5, 9, 4, 12, 30, - 25, 98, 56, 50, 25, 76, 12, 58, 6, 14, - 38, 40, 30, 30, + 59, 38, 9, 59, 41, 59, 25, 5, 22, 6, + 25, 30, 6, 9, 14, 34, 78, 5, 9, 4, + 12, 30, 25, 98, 56, 50, 25, 76, 12, 58, + 6, 14, 38, 40, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 121, 121, 121, 121, + }, { @@ -505,10 +529,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 17, 6, 33, 41, 96, 14, 33, 6, 33, 41, 96, 14, 33, 6, 33, 41, 96, 14, 33, 25, 9, 48, 48, 30, 38, 30, 38, 14, 38, 9, - 57, 38, 9, 57, 25, 3, 22, 6, 25, 30, - 6, 9, 14, 36, 78, 3, 9, 6, 14, 30, - 25, 96, 56, 48, 25, 74, 14, 56, 6, 14, - 38, 40, 30, 30, + 57, 38, 9, 57, 41, 57, 25, 3, 22, 6, + 25, 30, 6, 9, 14, 36, 78, 3, 9, 6, + 14, 30, 25, 96, 56, 48, 25, 74, 14, 56, + 6, 14, 38, 40, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -526,10 +552,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 15, 8, 31, 39, 94, 14, 31, 8, 31, 39, 94, 14, 31, 8, 31, 39, 94, 14, 31, 23, 7, 46, 46, 30, 40, 30, 40, 14, 40, 7, - 55, 40, 7, 55, 23, 0, 24, 8, 23, 30, - 8, 7, 14, 40, 78, 0, 7, 8, 16, 30, - 23, 94, 54, 46, 23, 70, 16, 54, 8, 14, - 40, 38, 30, 30, + 55, 40, 7, 55, 39, 55, 23, 0, 24, 8, + 23, 30, 8, 7, 14, 40, 78, 0, 7, 8, + 16, 30, 23, 94, 54, 46, 23, 70, 16, 54, + 8, 14, 40, 38, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -547,10 +575,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 15, 10, 31, 39, 94, 14, 31, 10, 31, 39, 94, 14, 31, 10, 31, 39, 94, 14, 31, 23, 7, 46, 46, 30, 40, 30, 40, 14, 40, 7, - 53, 40, 7, 53, 23, 2, 24, 8, 23, 30, - 8, 7, 14, 42, 78, 2, 7, 10, 16, 30, - 23, 94, 54, 46, 23, 68, 16, 52, 8, 14, - 40, 38, 30, 30, + 53, 40, 7, 53, 39, 53, 23, 2, 24, 8, + 23, 30, 8, 7, 14, 42, 78, 2, 7, 10, + 16, 30, 23, 94, 54, 46, 23, 68, 16, 52, + 8, 14, 40, 38, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -568,10 +598,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 13, 12, 29, 39, 92, 14, 29, 12, 29, 39, 92, 14, 29, 12, 29, 39, 92, 14, 29, 23, 7, 44, 44, 30, 40, 30, 40, 14, 40, 7, - 51, 40, 7, 51, 23, 4, 24, 8, 23, 30, - 8, 7, 14, 46, 78, 4, 7, 12, 18, 30, - 23, 92, 54, 44, 23, 64, 18, 50, 8, 14, - 40, 38, 30, 30, + 51, 40, 7, 51, 39, 51, 23, 4, 24, 8, + 23, 30, 8, 7, 14, 46, 78, 4, 7, 12, + 18, 30, 23, 92, 54, 44, 23, 64, 18, 50, + 8, 14, 40, 38, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -589,10 +621,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 13, 12, 29, 39, 90, 14, 29, 12, 29, 39, 90, 14, 29, 12, 29, 39, 90, 14, 29, 23, 7, 42, 42, 30, 40, 30, 40, 14, 40, 7, - 51, 40, 7, 51, 23, 6, 24, 8, 23, 30, - 8, 7, 14, 48, 78, 6, 7, 12, 18, 30, - 23, 90, 52, 42, 23, 60, 18, 48, 8, 14, - 40, 36, 30, 30, + 51, 40, 7, 51, 39, 51, 23, 6, 24, 8, + 23, 30, 8, 7, 14, 48, 78, 6, 7, 12, + 18, 30, 23, 90, 52, 42, 23, 60, 18, 48, + 8, 14, 40, 36, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -610,10 +644,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 11, 14, 27, 37, 90, 14, 27, 14, 27, 37, 90, 14, 27, 14, 27, 37, 90, 14, 27, 21, 5, 42, 42, 30, 42, 30, 42, 14, 42, 5, - 49, 42, 5, 49, 21, 10, 26, 10, 21, 30, - 10, 5, 14, 52, 78, 10, 5, 14, 20, 30, - 21, 90, 52, 42, 21, 58, 20, 48, 10, 14, - 42, 36, 30, 30, + 49, 42, 5, 49, 37, 49, 21, 10, 26, 10, + 21, 30, 10, 5, 14, 52, 78, 10, 5, 14, + 20, 30, 21, 90, 52, 42, 21, 58, 20, 48, + 10, 14, 42, 36, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -631,10 +667,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 9, 16, 25, 37, 88, 14, 25, 16, 25, 37, 88, 14, 25, 16, 25, 37, 88, 14, 25, 21, 5, 40, 40, 30, 42, 30, 42, 14, 42, 5, - 47, 42, 5, 47, 21, 12, 26, 10, 21, 30, - 10, 5, 14, 56, 78, 12, 5, 16, 22, 30, - 21, 88, 52, 40, 21, 54, 22, 46, 10, 14, - 42, 36, 30, 30, + 47, 42, 5, 47, 37, 47, 21, 12, 26, 10, + 21, 30, 10, 5, 14, 56, 78, 12, 5, 16, + 22, 30, 21, 88, 52, 40, 21, 54, 22, 46, + 10, 14, 42, 36, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -652,10 +690,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 9, 18, 25, 37, 88, 14, 25, 18, 25, 37, 88, 14, 25, 18, 25, 37, 88, 14, 25, 21, 5, 40, 40, 30, 42, 30, 42, 14, 42, 5, - 45, 42, 5, 45, 21, 14, 26, 10, 21, 30, - 10, 5, 14, 58, 78, 14, 5, 18, 22, 30, - 21, 88, 52, 40, 21, 52, 22, 44, 10, 14, - 42, 36, 30, 30, + 45, 42, 5, 45, 37, 45, 21, 14, 26, 10, + 21, 30, 10, 5, 14, 58, 78, 14, 5, 18, + 22, 30, 21, 88, 52, 40, 21, 52, 22, 44, + 10, 14, 42, 36, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -673,10 +713,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 7, 20, 23, 35, 86, 14, 23, 20, 23, 35, 86, 14, 23, 20, 23, 35, 86, 14, 23, 19, 3, 38, 38, 30, 44, 30, 44, 14, 44, 3, - 43, 44, 3, 43, 19, 16, 28, 12, 19, 30, - 12, 3, 14, 62, 78, 16, 3, 20, 24, 30, - 19, 86, 50, 38, 19, 48, 24, 42, 12, 14, - 44, 34, 30, 30, + 43, 44, 3, 43, 35, 43, 19, 16, 28, 12, + 19, 30, 12, 3, 14, 62, 78, 16, 3, 20, + 24, 30, 19, 86, 50, 38, 19, 48, 24, 42, + 12, 14, 44, 34, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -694,10 +736,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 5, 22, 21, 35, 84, 14, 21, 22, 21, 35, 84, 14, 21, 22, 21, 35, 84, 14, 21, 19, 3, 36, 36, 30, 44, 30, 44, 14, 44, 3, - 41, 44, 3, 41, 19, 20, 28, 12, 19, 30, - 12, 3, 14, 64, 78, 20, 3, 22, 26, 30, - 19, 84, 50, 36, 19, 46, 26, 40, 12, 14, - 44, 34, 30, 30, + 41, 44, 3, 41, 35, 41, 19, 20, 28, 12, + 19, 30, 12, 3, 14, 64, 78, 20, 3, 22, + 26, 30, 19, 84, 50, 36, 19, 46, 26, 40, + 12, 14, 44, 34, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -715,10 +759,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 5, 24, 21, 35, 84, 14, 21, 24, 21, 35, 84, 14, 21, 24, 21, 35, 84, 14, 21, 19, 3, 36, 36, 30, 44, 30, 44, 14, 44, 3, - 39, 44, 3, 39, 19, 22, 28, 12, 19, 30, - 12, 3, 14, 68, 78, 22, 3, 24, 26, 30, - 19, 84, 50, 36, 19, 42, 26, 38, 12, 14, - 44, 34, 30, 30, + 39, 44, 3, 39, 35, 39, 19, 22, 28, 12, + 19, 30, 12, 3, 14, 68, 78, 22, 3, 24, + 26, 30, 19, 84, 50, 36, 19, 42, 26, 38, + 12, 14, 44, 34, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -736,10 +782,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 3, 26, 19, 33, 82, 14, 19, 26, 19, 33, 82, 14, 19, 26, 19, 33, 82, 14, 19, 17, 1, 34, 34, 30, 46, 30, 46, 14, 46, 1, - 37, 46, 1, 37, 17, 24, 30, 14, 17, 30, - 14, 1, 14, 70, 78, 24, 1, 26, 28, 30, - 17, 82, 48, 34, 17, 40, 28, 36, 14, 14, - 46, 32, 30, 30, + 37, 46, 1, 37, 33, 37, 17, 24, 30, 14, + 17, 30, 14, 1, 14, 70, 78, 24, 1, 26, + 28, 30, 17, 82, 48, 34, 17, 40, 28, 36, + 14, 14, 46, 32, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -757,10 +805,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 3, 28, 19, 33, 80, 14, 19, 28, 19, 33, 80, 14, 19, 28, 19, 33, 80, 14, 19, 17, 1, 32, 32, 30, 46, 30, 46, 14, 46, 1, - 35, 46, 1, 35, 17, 26, 30, 14, 17, 30, - 14, 1, 14, 74, 78, 26, 1, 28, 28, 30, - 17, 80, 48, 32, 17, 36, 28, 34, 14, 14, - 46, 32, 30, 30, + 35, 46, 1, 35, 33, 35, 17, 26, 30, 14, + 17, 30, 14, 1, 14, 74, 78, 26, 1, 28, + 28, 30, 17, 80, 48, 32, 17, 36, 28, 34, + 14, 14, 46, 32, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -778,10 +828,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 1, 30, 17, 33, 80, 14, 17, 30, 17, 33, 80, 14, 17, 30, 17, 33, 80, 14, 17, 17, 1, 32, 32, 30, 46, 30, 46, 14, 46, 1, - 33, 46, 1, 33, 17, 30, 30, 14, 17, 30, - 14, 1, 14, 78, 78, 30, 1, 30, 30, 30, - 17, 80, 48, 32, 17, 32, 30, 32, 14, 14, - 46, 32, 30, 30, + 33, 46, 1, 33, 33, 33, 17, 30, 30, 14, + 17, 30, 14, 1, 14, 78, 78, 30, 1, 30, + 30, 30, 17, 80, 48, 32, 17, 32, 30, 32, + 14, 14, 46, 32, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -799,10 +851,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 0, 32, 15, 31, 78, 14, 15, 32, 15, 31, 78, 14, 15, 32, 15, 31, 78, 14, 15, 15, 0, 30, 30, 30, 48, 30, 48, 14, 48, 0, - 31, 48, 0, 31, 15, 32, 32, 16, 15, 30, - 16, 0, 14, 80, 78, 32, 0, 32, 32, 30, - 15, 78, 46, 30, 15, 30, 32, 30, 16, 14, - 48, 30, 30, 30, + 31, 48, 0, 31, 31, 31, 15, 32, 32, 16, + 15, 30, 16, 0, 14, 80, 78, 32, 0, 32, + 32, 30, 15, 78, 46, 30, 15, 30, 32, 30, + 16, 14, 48, 30, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -820,10 +874,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 0, 34, 15, 31, 78, 14, 15, 34, 15, 31, 78, 14, 15, 34, 15, 31, 78, 14, 15, 15, 0, 30, 30, 30, 48, 30, 48, 14, 48, 0, - 29, 48, 0, 29, 15, 34, 32, 16, 15, 30, - 16, 0, 14, 84, 78, 34, 0, 34, 32, 30, - 15, 78, 46, 30, 15, 26, 32, 28, 16, 14, - 48, 30, 30, 30, + 29, 48, 0, 29, 31, 29, 15, 34, 32, 16, + 15, 30, 16, 0, 14, 84, 78, 34, 0, 34, + 32, 30, 15, 78, 46, 30, 15, 26, 32, 28, + 16, 14, 48, 30, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -841,10 +897,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 2, 36, 13, 31, 76, 14, 13, 36, 13, 31, 76, 14, 13, 36, 13, 31, 76, 14, 13, 15, 0, 28, 28, 30, 48, 30, 48, 14, 48, 0, - 27, 48, 0, 27, 15, 36, 32, 16, 15, 30, - 16, 0, 14, 86, 78, 36, 0, 36, 34, 30, - 15, 76, 46, 28, 15, 24, 34, 26, 16, 14, - 48, 30, 30, 30, + 27, 48, 0, 27, 31, 27, 15, 36, 32, 16, + 15, 30, 16, 0, 14, 86, 78, 36, 0, 36, + 34, 30, 15, 76, 46, 28, 15, 24, 34, 26, + 16, 14, 48, 30, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -862,10 +920,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 4, 38, 11, 29, 74, 14, 11, 38, 11, 29, 74, 14, 11, 38, 11, 29, 74, 14, 11, 13, 2, 26, 26, 30, 50, 30, 50, 14, 50, 2, - 25, 50, 2, 25, 13, 40, 34, 18, 13, 30, - 18, 2, 14, 90, 78, 40, 2, 38, 36, 30, - 13, 74, 44, 26, 13, 20, 36, 24, 18, 14, - 50, 28, 30, 30, + 25, 50, 2, 25, 29, 25, 13, 40, 34, 18, + 13, 30, 18, 2, 14, 90, 78, 40, 2, 38, + 36, 30, 13, 74, 44, 26, 13, 20, 36, 24, + 18, 14, 50, 28, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -883,10 +943,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 4, 40, 11, 29, 74, 14, 11, 40, 11, 29, 74, 14, 11, 40, 11, 29, 74, 14, 11, 13, 2, 26, 26, 30, 50, 30, 50, 14, 50, 2, - 23, 50, 2, 23, 13, 42, 34, 18, 13, 30, - 18, 2, 14, 92, 78, 42, 2, 40, 36, 30, - 13, 74, 44, 26, 13, 18, 36, 22, 18, 14, - 50, 28, 30, 30, + 23, 50, 2, 23, 29, 23, 13, 42, 34, 18, + 13, 30, 18, 2, 14, 92, 78, 42, 2, 40, + 36, 30, 13, 74, 44, 26, 13, 18, 36, 22, + 18, 14, 50, 28, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -904,10 +966,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 6, 42, 9, 29, 72, 14, 9, 42, 9, 29, 72, 14, 9, 42, 9, 29, 72, 14, 9, 13, 2, 24, 24, 30, 50, 30, 50, 14, 50, 2, - 21, 50, 2, 21, 13, 44, 34, 18, 13, 30, - 18, 2, 14, 96, 78, 44, 2, 42, 38, 30, - 13, 72, 44, 24, 13, 14, 38, 20, 18, 14, - 50, 28, 30, 30, + 21, 50, 2, 21, 29, 21, 13, 44, 34, 18, + 13, 30, 18, 2, 14, 96, 78, 44, 2, 42, + 38, 30, 13, 72, 44, 24, 13, 14, 38, 20, + 18, 14, 50, 28, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -925,10 +989,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 6, 42, 9, 29, 70, 14, 9, 42, 9, 29, 70, 14, 9, 42, 9, 29, 70, 14, 9, 13, 2, 22, 22, 30, 50, 30, 50, 14, 50, 2, - 21, 50, 2, 21, 13, 46, 34, 18, 13, 30, - 18, 2, 14, 98, 78, 46, 2, 42, 38, 30, - 13, 70, 42, 22, 13, 10, 38, 18, 18, 14, - 50, 26, 30, 30, + 21, 50, 2, 21, 29, 21, 13, 46, 34, 18, + 13, 30, 18, 2, 14, 98, 78, 46, 2, 42, + 38, 30, 13, 70, 42, 22, 13, 10, 38, 18, + 18, 14, 50, 26, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -946,10 +1012,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 8, 44, 7, 27, 70, 14, 7, 44, 7, 27, 70, 14, 7, 44, 7, 27, 70, 14, 7, 11, 4, 22, 22, 30, 52, 30, 52, 14, 52, 4, - 19, 52, 4, 19, 11, 50, 36, 20, 11, 30, - 20, 4, 14, 102, 78, 50, 4, 44, 40, 30, - 11, 70, 42, 22, 11, 8, 40, 18, 20, 14, - 52, 26, 30, 30, + 19, 52, 4, 19, 27, 19, 11, 50, 36, 20, + 11, 30, 20, 4, 14, 102, 78, 50, 4, 44, + 40, 30, 11, 70, 42, 22, 11, 8, 40, 18, + 20, 14, 52, 26, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -967,10 +1035,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 10, 46, 5, 27, 68, 14, 5, 46, 5, 27, 68, 14, 5, 46, 5, 27, 68, 14, 5, 11, 4, 20, 20, 30, 52, 30, 52, 14, 52, 4, - 17, 52, 4, 17, 11, 52, 36, 20, 11, 30, - 20, 4, 14, 106, 78, 52, 4, 46, 42, 30, - 11, 68, 42, 20, 11, 4, 42, 16, 20, 14, - 52, 26, 30, 30, + 17, 52, 4, 17, 27, 17, 11, 52, 36, 20, + 11, 30, 20, 4, 14, 106, 78, 52, 4, 46, + 42, 30, 11, 68, 42, 20, 11, 4, 42, 16, + 20, 14, 52, 26, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -988,10 +1058,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 10, 48, 5, 27, 68, 14, 5, 48, 5, 27, 68, 14, 5, 48, 5, 27, 68, 14, 5, 11, 4, 20, 20, 30, 52, 30, 52, 14, 52, 4, - 15, 52, 4, 15, 11, 54, 36, 20, 11, 30, - 20, 4, 14, 108, 78, 54, 4, 48, 42, 30, - 11, 68, 42, 20, 11, 2, 42, 14, 20, 14, - 52, 26, 30, 30, + 15, 52, 4, 15, 27, 15, 11, 54, 36, 20, + 11, 30, 20, 4, 14, 108, 78, 54, 4, 48, + 42, 30, 11, 68, 42, 20, 11, 2, 42, 14, + 20, 14, 52, 26, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1009,10 +1081,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 12, 50, 3, 25, 66, 14, 3, 50, 3, 25, 66, 14, 3, 50, 3, 25, 66, 14, 3, 9, 6, 18, 18, 30, 54, 30, 54, 14, 54, 6, - 13, 54, 6, 13, 9, 56, 38, 22, 9, 30, - 22, 6, 14, 112, 78, 56, 6, 50, 44, 30, - 9, 66, 40, 18, 9, 1, 44, 12, 22, 14, - 54, 24, 30, 30, + 13, 54, 6, 13, 25, 13, 9, 56, 38, 22, + 9, 30, 22, 6, 14, 112, 78, 56, 6, 50, + 44, 30, 9, 66, 40, 18, 9, 1, 44, 12, + 22, 14, 54, 24, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1030,10 +1104,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 52, 1, 25, 64, 14, 1, 52, 1, 25, 64, 14, 1, 52, 1, 25, 64, 14, 1, 9, 6, 16, 16, 30, 54, 30, 54, 14, 54, 6, - 11, 54, 6, 11, 9, 60, 38, 22, 9, 30, - 22, 6, 14, 114, 78, 60, 6, 52, 46, 30, - 9, 64, 40, 16, 9, 3, 46, 10, 22, 14, - 54, 24, 30, 30, + 11, 54, 6, 11, 25, 11, 9, 60, 38, 22, + 9, 30, 22, 6, 14, 114, 78, 60, 6, 52, + 46, 30, 9, 64, 40, 16, 9, 3, 46, 10, + 22, 14, 54, 24, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1051,10 +1127,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 54, 1, 25, 64, 14, 1, 54, 1, 25, 64, 14, 1, 54, 1, 25, 64, 14, 1, 9, 6, 16, 16, 30, 54, 30, 54, 14, 54, 6, - 9, 54, 6, 9, 9, 62, 38, 22, 9, 30, - 22, 6, 14, 118, 78, 62, 6, 54, 46, 30, - 9, 64, 40, 16, 9, 7, 46, 8, 22, 14, - 54, 24, 30, 30, + 9, 54, 6, 9, 25, 9, 9, 62, 38, 22, + 9, 30, 22, 6, 14, 118, 78, 62, 6, 54, + 46, 30, 9, 64, 40, 16, 9, 7, 46, 8, + 22, 14, 54, 24, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1072,10 +1150,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 16, 56, 0, 23, 62, 14, 0, 56, 0, 23, 62, 14, 0, 56, 0, 23, 62, 14, 0, 7, 8, 14, 14, 30, 56, 30, 56, 14, 56, 8, - 7, 56, 8, 7, 7, 64, 40, 24, 7, 30, - 24, 8, 14, 120, 78, 64, 8, 56, 48, 30, - 7, 62, 38, 14, 7, 9, 48, 6, 24, 14, - 56, 22, 30, 30, + 7, 56, 8, 7, 23, 7, 7, 64, 40, 24, + 7, 30, 24, 8, 14, 120, 78, 64, 8, 56, + 48, 30, 7, 62, 38, 14, 7, 9, 48, 6, + 24, 14, 56, 22, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1093,10 +1173,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 16, 58, 0, 23, 60, 14, 0, 58, 0, 23, 60, 14, 0, 58, 0, 23, 60, 14, 0, 7, 8, 12, 12, 30, 56, 30, 56, 14, 56, 8, - 5, 56, 8, 5, 7, 66, 40, 24, 7, 30, - 24, 8, 14, 124, 78, 66, 8, 58, 48, 30, - 7, 60, 38, 12, 7, 13, 48, 4, 24, 14, - 56, 22, 30, 30, + 5, 56, 8, 5, 23, 5, 7, 66, 40, 24, + 7, 30, 24, 8, 14, 124, 78, 66, 8, 58, + 48, 30, 7, 60, 38, 12, 7, 13, 48, 4, + 24, 14, 56, 22, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1114,10 +1196,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 18, 60, 2, 23, 60, 14, 2, 60, 2, 23, 60, 14, 2, 60, 2, 23, 60, 14, 2, 7, 8, 12, 12, 30, 56, 30, 56, 14, 56, 8, - 3, 56, 8, 3, 7, 70, 40, 24, 7, 30, - 24, 8, 14, 124, 78, 70, 8, 60, 50, 30, - 7, 60, 38, 12, 7, 17, 50, 2, 24, 14, - 56, 22, 30, 30, + 3, 56, 8, 3, 23, 3, 7, 70, 40, 24, + 7, 30, 24, 8, 14, 124, 78, 70, 8, 60, + 50, 30, 7, 60, 38, 12, 7, 17, 50, 2, + 24, 14, 56, 22, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1135,10 +1219,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 20, 62, 4, 21, 58, 14, 4, 62, 4, 21, 58, 14, 4, 62, 4, 21, 58, 14, 4, 5, 10, 10, 10, 30, 58, 30, 58, 14, 58, 10, - 1, 58, 10, 1, 5, 72, 42, 26, 5, 30, - 26, 10, 14, 124, 78, 72, 10, 62, 52, 30, - 5, 58, 36, 10, 5, 19, 52, 0, 26, 14, - 58, 20, 30, 30, + 1, 58, 10, 1, 21, 1, 5, 72, 42, 26, + 5, 30, 26, 10, 14, 124, 78, 72, 10, 62, + 52, 30, 5, 58, 36, 10, 5, 19, 52, 0, + 26, 14, 58, 20, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1156,10 +1242,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 20, 64, 4, 21, 58, 14, 4, 64, 4, 21, 58, 14, 4, 64, 4, 21, 58, 14, 4, 5, 10, 10, 10, 30, 58, 30, 58, 14, 58, 10, - 0, 58, 10, 0, 5, 74, 42, 26, 5, 30, - 26, 10, 14, 124, 78, 74, 10, 64, 52, 30, - 5, 58, 36, 10, 5, 23, 52, 1, 26, 14, - 58, 20, 30, 30, + 0, 58, 10, 0, 21, 0, 5, 74, 42, 26, + 5, 30, 26, 10, 14, 124, 78, 74, 10, 64, + 52, 30, 5, 58, 36, 10, 5, 23, 52, 1, + 26, 14, 58, 20, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1177,10 +1265,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 22, 66, 6, 21, 56, 14, 6, 66, 6, 21, 56, 14, 6, 66, 6, 21, 56, 14, 6, 5, 10, 8, 8, 30, 58, 30, 58, 14, 58, 10, - 2, 58, 10, 2, 5, 76, 42, 26, 5, 30, - 26, 10, 14, 124, 78, 76, 10, 66, 54, 30, - 5, 56, 36, 8, 5, 25, 54, 3, 26, 14, - 58, 20, 30, 30, + 2, 58, 10, 2, 21, 2, 5, 76, 42, 26, + 5, 30, 26, 10, 14, 124, 78, 76, 10, 66, + 54, 30, 5, 56, 36, 8, 5, 25, 54, 3, + 26, 14, 58, 20, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1198,10 +1288,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 24, 68, 8, 19, 54, 14, 8, 68, 8, 19, 54, 14, 8, 68, 8, 19, 54, 14, 8, 3, 12, 6, 6, 30, 60, 30, 60, 14, 60, 12, - 4, 60, 12, 4, 3, 80, 44, 28, 3, 30, - 28, 12, 14, 124, 78, 80, 12, 68, 56, 30, - 3, 54, 34, 6, 3, 29, 56, 5, 28, 14, - 60, 18, 30, 30, + 4, 60, 12, 4, 19, 4, 3, 80, 44, 28, + 3, 30, 28, 12, 14, 124, 78, 80, 12, 68, + 56, 30, 3, 54, 34, 6, 3, 29, 56, 5, + 28, 14, 60, 18, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1219,10 +1311,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 24, 70, 8, 19, 54, 14, 8, 70, 8, 19, 54, 14, 8, 70, 8, 19, 54, 14, 8, 3, 12, 6, 6, 30, 60, 30, 60, 14, 60, 12, - 6, 60, 12, 6, 3, 82, 44, 28, 3, 30, - 28, 12, 14, 124, 78, 82, 12, 70, 56, 30, - 3, 54, 34, 6, 3, 31, 56, 7, 28, 14, - 60, 18, 30, 30, + 6, 60, 12, 6, 19, 6, 3, 82, 44, 28, + 3, 30, 28, 12, 14, 124, 78, 82, 12, 70, + 56, 30, 3, 54, 34, 6, 3, 31, 56, 7, + 28, 14, 60, 18, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1240,10 +1334,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 26, 72, 10, 19, 52, 14, 10, 72, 10, 19, 52, 14, 10, 72, 10, 19, 52, 14, 10, 3, 12, 4, 4, 30, 60, 30, 60, 14, 60, 12, - 8, 60, 12, 8, 3, 84, 44, 28, 3, 30, - 28, 12, 14, 124, 78, 84, 12, 72, 58, 30, - 3, 52, 34, 4, 3, 35, 58, 9, 28, 14, - 60, 18, 30, 30, + 8, 60, 12, 8, 19, 8, 3, 84, 44, 28, + 3, 30, 28, 12, 14, 124, 78, 84, 12, 72, + 58, 30, 3, 52, 34, 4, 3, 35, 58, 9, + 28, 14, 60, 18, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1261,10 +1357,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 26, 72, 10, 19, 50, 14, 10, 72, 10, 19, 50, 14, 10, 72, 10, 19, 50, 14, 10, 3, 12, 2, 2, 30, 60, 30, 60, 14, 60, 12, - 8, 60, 12, 8, 3, 86, 44, 28, 3, 30, - 28, 12, 14, 124, 78, 86, 12, 72, 58, 30, - 3, 50, 32, 2, 3, 39, 58, 11, 28, 14, - 60, 16, 30, 30, + 8, 60, 12, 8, 19, 8, 3, 86, 44, 28, + 3, 30, 28, 12, 14, 124, 78, 86, 12, 72, + 58, 30, 3, 50, 32, 2, 3, 39, 58, 11, + 28, 14, 60, 16, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1282,10 +1380,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 28, 74, 12, 17, 50, 14, 12, 74, 12, 17, 50, 14, 12, 74, 12, 17, 50, 14, 12, 1, 14, 2, 2, 30, 62, 30, 62, 14, 62, 14, - 10, 62, 14, 10, 1, 90, 46, 30, 1, 30, - 30, 14, 14, 124, 78, 90, 14, 74, 60, 30, - 1, 50, 32, 2, 1, 41, 60, 11, 30, 14, - 62, 16, 30, 30, + 10, 62, 14, 10, 17, 10, 1, 90, 46, 30, + 1, 30, 30, 14, 14, 124, 78, 90, 14, 74, + 60, 30, 1, 50, 32, 2, 1, 41, 60, 11, + 30, 14, 62, 16, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1303,10 +1403,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 30, 76, 14, 17, 48, 14, 14, 76, 14, 17, 48, 14, 14, 76, 14, 17, 48, 14, 14, 1, 14, 0, 0, 30, 62, 30, 62, 14, 62, 14, - 12, 62, 14, 12, 1, 92, 46, 30, 1, 30, - 30, 14, 14, 124, 78, 92, 14, 76, 62, 30, - 1, 48, 32, 0, 1, 45, 62, 13, 30, 14, - 62, 16, 30, 30, + 12, 62, 14, 12, 17, 12, 1, 92, 46, 30, + 1, 30, 30, 14, 14, 124, 78, 92, 14, 76, + 62, 30, 1, 48, 32, 0, 1, 45, 62, 13, + 30, 14, 62, 16, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, { @@ -1324,10 +1426,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 30, 78, 14, 17, 48, 14, 14, 78, 14, 17, 48, 14, 14, 78, 14, 17, 48, 14, 14, 1, 14, 0, 0, 30, 62, 30, 62, 14, 62, 14, - 14, 62, 14, 14, 1, 94, 46, 30, 1, 30, - 30, 14, 14, 124, 78, 94, 14, 78, 62, 30, - 1, 48, 32, 0, 1, 47, 62, 15, 30, 14, - 62, 16, 30, 30, + 14, 62, 14, 14, 17, 14, 1, 94, 46, 30, + 1, 30, 30, 14, 14, 124, 78, 94, 14, 78, + 62, 30, 1, 48, 32, 0, 1, 47, 62, 15, + 30, 14, 62, 16, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 125, 125, 125, 125, + }, }, @@ -1348,10 +1452,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 62, 46, 33, 30, 14, 1, 62, 46, 33, 30, 14, 1, 62, 46, 33, 30, 14, 1, 1, 14, 17, 17, 17, 14, 17, 14, 46, 46, 46, - 33, 46, 46, 33, 1, 94, 94, 46, 1, 30, - 46, 62, 62, 62, 78, 30, 14, 14, 30, 14, - 14, 124, 62, 46, 1, 46, 14, 62, 17, 46, - 17, 1, 17, 46, + 33, 46, 46, 33, 33, 33, 1, 94, 94, 46, + 1, 30, 46, 62, 62, 62, 78, 30, 14, 14, + 30, 14, 14, 124, 62, 46, 1, 46, 14, 62, + 17, 46, 17, 1, 17, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 17, 17, 17, 17, + }, { @@ -1369,10 +1475,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 62, 46, 31, 32, 14, 1, 62, 46, 31, 32, 14, 1, 62, 46, 31, 32, 14, 1, 1, 14, 15, 15, 15, 16, 15, 16, 46, 46, 46, - 31, 46, 46, 31, 1, 94, 94, 46, 1, 30, - 46, 62, 62, 64, 78, 32, 14, 16, 32, 16, - 14, 124, 62, 46, 1, 46, 16, 62, 15, 46, - 13, 0, 15, 46, + 31, 46, 46, 31, 31, 31, 1, 94, 94, 46, + 1, 30, 46, 62, 62, 64, 78, 32, 14, 16, + 32, 16, 14, 124, 62, 46, 1, 46, 16, 62, + 15, 46, 13, 0, 15, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 15, 15, 15, 15, + }, { @@ -1390,10 +1498,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 1, 14, 13, 13, 13, 18, 13, 18, 46, 46, 44, - 31, 46, 44, 31, 1, 92, 92, 46, 1, 30, - 46, 60, 60, 64, 78, 32, 14, 18, 32, 16, - 14, 124, 62, 46, 1, 46, 16, 60, 13, 46, - 11, 2, 13, 46, + 31, 46, 44, 31, 31, 31, 1, 92, 92, 46, + 1, 30, 46, 60, 60, 64, 78, 32, 14, 18, + 32, 16, 14, 124, 62, 46, 1, 46, 16, 60, + 13, 46, 11, 2, 13, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 15, 15, 15, 15, + }, { @@ -1411,10 +1521,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 1, 14, 13, 13, 11, 18, 11, 18, 46, 46, 44, - 31, 46, 44, 31, 1, 90, 90, 46, 1, 30, - 46, 60, 60, 64, 78, 32, 14, 18, 32, 16, - 14, 122, 62, 46, 1, 46, 16, 60, 11, 46, - 9, 2, 11, 46, + 31, 46, 44, 31, 31, 31, 1, 90, 90, 46, + 1, 30, 46, 60, 60, 64, 78, 32, 14, 18, + 32, 16, 14, 122, 62, 46, 1, 46, 16, 60, + 11, 46, 9, 2, 11, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 15, 15, 15, 15, + }, { @@ -1432,10 +1544,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 60, 42, 29, 34, 14, 1, 60, 42, 29, 34, 14, 1, 60, 42, 29, 34, 14, 1, 3, 14, 11, 11, 9, 20, 9, 20, 44, 46, 42, - 29, 46, 42, 29, 1, 88, 88, 44, 1, 30, - 44, 58, 58, 66, 78, 34, 14, 20, 34, 18, - 12, 120, 60, 44, 1, 44, 18, 58, 9, 44, - 7, 4, 9, 44, + 29, 46, 42, 29, 29, 29, 1, 88, 88, 44, + 1, 30, 44, 58, 58, 66, 78, 34, 14, 20, + 34, 18, 12, 120, 60, 44, 1, 44, 18, 58, + 9, 44, 7, 4, 9, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, + }, { @@ -1453,10 +1567,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 3, 14, 9, 9, 7, 22, 7, 22, 44, 46, 40, - 29, 46, 40, 29, 1, 86, 86, 44, 1, 30, - 44, 56, 56, 66, 78, 34, 14, 22, 34, 18, - 12, 118, 60, 44, 1, 44, 18, 56, 7, 44, - 3, 6, 7, 44, + 29, 46, 40, 29, 29, 29, 1, 86, 86, 44, + 1, 30, 44, 56, 56, 66, 78, 34, 14, 22, + 34, 18, 12, 118, 60, 44, 1, 44, 18, 56, + 7, 44, 3, 6, 7, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, + }, { @@ -1474,10 +1590,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 3, 14, 9, 9, 5, 22, 5, 22, 44, 46, 40, - 29, 46, 40, 29, 1, 84, 84, 44, 1, 30, - 44, 56, 56, 66, 78, 34, 14, 22, 34, 18, - 12, 116, 60, 44, 1, 44, 18, 56, 5, 44, - 1, 6, 5, 44, + 29, 46, 40, 29, 29, 29, 1, 84, 84, 44, + 1, 30, 44, 56, 56, 66, 78, 34, 14, 22, + 34, 18, 12, 116, 60, 44, 1, 44, 18, 56, + 5, 44, 1, 6, 5, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, + }, { @@ -1495,10 +1613,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 58, 38, 27, 36, 14, 1, 58, 38, 27, 36, 14, 1, 58, 38, 27, 36, 14, 1, 5, 14, 7, 7, 3, 24, 3, 24, 42, 46, 38, - 27, 46, 38, 27, 1, 82, 82, 42, 1, 30, - 42, 54, 54, 68, 78, 36, 14, 24, 36, 20, - 10, 114, 58, 42, 1, 42, 20, 54, 3, 42, - 0, 8, 3, 42, + 27, 46, 38, 27, 27, 27, 1, 82, 82, 42, + 1, 30, 42, 54, 54, 68, 78, 36, 14, 24, + 36, 20, 10, 114, 58, 42, 1, 42, 20, 54, + 3, 42, 0, 8, 3, 42, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 11, 11, 11, 11, + }, { @@ -1516,10 +1636,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 5, 14, 7, 7, 1, 24, 1, 24, 42, 46, 36, - 27, 46, 36, 27, 1, 80, 80, 42, 1, 30, - 42, 52, 52, 68, 78, 36, 14, 24, 36, 20, - 10, 112, 58, 42, 1, 42, 20, 52, 1, 42, - 2, 8, 1, 42, + 27, 46, 36, 27, 27, 27, 1, 80, 80, 42, + 1, 30, 42, 52, 52, 68, 78, 36, 14, 24, + 36, 20, 10, 112, 58, 42, 1, 42, 20, 52, + 1, 42, 2, 8, 1, 42, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 11, 11, 11, 11, + }, { @@ -1537,10 +1659,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 5, 14, 5, 5, 0, 26, 0, 26, 42, 46, 36, - 27, 46, 36, 27, 1, 78, 78, 42, 1, 30, - 42, 52, 52, 68, 78, 36, 14, 26, 36, 20, - 10, 110, 58, 42, 1, 42, 20, 52, 0, 42, - 6, 10, 0, 42, + 27, 46, 36, 27, 27, 27, 1, 78, 78, 42, + 1, 30, 42, 52, 52, 68, 78, 36, 14, 26, + 36, 20, 10, 110, 58, 42, 1, 42, 20, 52, + 0, 42, 6, 10, 0, 42, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 11, 11, 11, 11, + }, { @@ -1558,10 +1682,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 7, 14, 3, 3, 2, 28, 2, 28, 40, 46, 34, - 25, 46, 34, 25, 1, 76, 76, 40, 1, 30, - 40, 50, 50, 70, 78, 38, 14, 28, 38, 22, - 8, 108, 56, 40, 1, 40, 22, 50, 2, 40, - 8, 12, 2, 40, + 25, 46, 34, 25, 25, 25, 1, 76, 76, 40, + 1, 30, 40, 50, 50, 70, 78, 38, 14, 28, + 38, 22, 8, 108, 56, 40, 1, 40, 22, 50, + 2, 40, 8, 12, 2, 40, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, + }, { @@ -1579,10 +1705,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 7, 14, 3, 3, 4, 28, 4, 28, 40, 46, 34, - 25, 46, 34, 25, 1, 74, 74, 40, 1, 30, - 40, 50, 50, 70, 78, 38, 14, 28, 38, 22, - 8, 106, 56, 40, 1, 40, 22, 50, 4, 40, - 10, 12, 4, 40, + 25, 46, 34, 25, 25, 25, 1, 74, 74, 40, + 1, 30, 40, 50, 50, 70, 78, 38, 14, 28, + 38, 22, 8, 106, 56, 40, 1, 40, 22, 50, + 4, 40, 10, 12, 4, 40, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, + }, { @@ -1600,10 +1728,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 56, 32, 25, 38, 14, 1, 56, 32, 25, 38, 14, 1, 56, 32, 25, 38, 14, 1, 7, 14, 1, 1, 6, 30, 6, 30, 40, 46, 32, - 25, 46, 32, 25, 1, 72, 72, 40, 1, 30, - 40, 48, 48, 70, 78, 38, 14, 30, 38, 22, - 8, 104, 56, 40, 1, 40, 22, 48, 6, 40, - 12, 14, 6, 40, + 25, 46, 32, 25, 25, 25, 1, 72, 72, 40, + 1, 30, 40, 48, 48, 70, 78, 38, 14, 30, + 38, 22, 8, 104, 56, 40, 1, 40, 22, 48, + 6, 40, 12, 14, 6, 40, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, + }, { @@ -1621,10 +1751,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 9, 14, 0, 0, 8, 32, 8, 32, 38, 46, 30, - 23, 46, 30, 23, 1, 70, 70, 38, 1, 30, - 38, 46, 46, 72, 78, 40, 14, 32, 40, 24, - 6, 102, 54, 38, 1, 38, 24, 46, 8, 38, - 16, 16, 8, 38, + 23, 46, 30, 23, 23, 23, 1, 70, 70, 38, + 1, 30, 38, 46, 46, 72, 78, 40, 14, 32, + 40, 24, 6, 102, 54, 38, 1, 38, 24, 46, + 8, 38, 16, 16, 8, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -1642,10 +1774,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 9, 14, 0, 0, 10, 32, 10, 32, 38, 46, 30, - 23, 46, 30, 23, 1, 68, 68, 38, 1, 30, - 38, 46, 46, 72, 78, 40, 14, 32, 40, 24, - 6, 100, 54, 38, 1, 38, 24, 46, 10, 38, - 18, 16, 10, 38, + 23, 46, 30, 23, 23, 23, 1, 68, 68, 38, + 1, 30, 38, 46, 46, 72, 78, 40, 14, 32, + 40, 24, 6, 100, 54, 38, 1, 38, 24, 46, + 10, 38, 18, 16, 10, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -1663,10 +1797,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 54, 28, 23, 40, 14, 1, 54, 28, 23, 40, 14, 1, 54, 28, 23, 40, 14, 1, 9, 14, 2, 2, 12, 34, 12, 34, 38, 46, 28, - 23, 46, 28, 23, 1, 66, 66, 38, 1, 30, - 38, 44, 44, 72, 78, 40, 14, 34, 40, 24, - 6, 98, 54, 38, 1, 38, 24, 44, 12, 38, - 20, 18, 12, 38, + 23, 46, 28, 23, 23, 23, 1, 66, 66, 38, + 1, 30, 38, 44, 44, 72, 78, 40, 14, 34, + 40, 24, 6, 98, 54, 38, 1, 38, 24, 44, + 12, 38, 20, 18, 12, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -1684,10 +1820,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 52, 26, 23, 40, 14, 1, 52, 26, 23, 40, 14, 1, 52, 26, 23, 40, 14, 1, 11, 14, 2, 2, 12, 34, 12, 34, 36, 46, 26, - 23, 46, 26, 23, 1, 64, 64, 36, 1, 30, - 36, 42, 42, 72, 78, 40, 14, 34, 40, 24, - 4, 96, 52, 36, 1, 36, 24, 42, 12, 36, - 22, 18, 12, 36, + 23, 46, 26, 23, 23, 23, 1, 64, 64, 36, + 1, 30, 36, 42, 42, 72, 78, 40, 14, 34, + 40, 24, 4, 96, 52, 36, 1, 36, 24, 42, + 12, 36, 22, 18, 12, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -1705,10 +1843,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 52, 26, 21, 42, 14, 1, 52, 26, 21, 42, 14, 1, 52, 26, 21, 42, 14, 1, 11, 14, 4, 4, 14, 36, 14, 36, 36, 46, 26, - 21, 46, 26, 21, 1, 64, 64, 36, 1, 30, - 36, 42, 42, 74, 78, 42, 14, 36, 42, 26, - 4, 96, 52, 36, 1, 36, 26, 42, 14, 36, - 26, 20, 14, 36, + 21, 46, 26, 21, 21, 21, 1, 64, 64, 36, + 1, 30, 36, 42, 42, 74, 78, 42, 14, 36, + 42, 26, 4, 96, 52, 36, 1, 36, 26, 42, + 14, 36, 26, 20, 14, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, + }, { @@ -1726,10 +1866,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 11, 14, 6, 6, 16, 38, 16, 38, 36, 46, 24, - 21, 46, 24, 21, 1, 62, 62, 36, 1, 30, - 36, 40, 40, 74, 78, 42, 14, 38, 42, 26, - 4, 94, 52, 36, 1, 36, 26, 40, 16, 36, - 28, 22, 16, 36, + 21, 46, 24, 21, 21, 21, 1, 62, 62, 36, + 1, 30, 36, 40, 40, 74, 78, 42, 14, 38, + 42, 26, 4, 94, 52, 36, 1, 36, 26, 40, + 16, 36, 28, 22, 16, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, + }, { @@ -1747,10 +1889,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 11, 14, 6, 6, 18, 38, 18, 38, 36, 46, 24, - 21, 46, 24, 21, 1, 60, 60, 36, 1, 30, - 36, 40, 40, 74, 78, 42, 14, 38, 42, 26, - 4, 92, 52, 36, 1, 36, 26, 40, 18, 36, - 30, 22, 18, 36, + 21, 46, 24, 21, 21, 21, 1, 60, 60, 36, + 1, 30, 36, 40, 40, 74, 78, 42, 14, 38, + 42, 26, 4, 92, 52, 36, 1, 36, 26, 40, + 18, 36, 30, 22, 18, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, + }, { @@ -1768,10 +1912,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 50, 22, 19, 44, 14, 1, 50, 22, 19, 44, 14, 1, 50, 22, 19, 44, 14, 1, 13, 14, 8, 8, 20, 40, 20, 40, 34, 46, 22, - 19, 46, 22, 19, 1, 58, 58, 34, 1, 30, - 34, 38, 38, 76, 78, 44, 14, 40, 44, 28, - 2, 90, 50, 34, 1, 34, 28, 38, 20, 34, - 32, 24, 20, 34, + 19, 46, 22, 19, 19, 19, 1, 58, 58, 34, + 1, 30, 34, 38, 38, 76, 78, 44, 14, 40, + 44, 28, 2, 90, 50, 34, 1, 34, 28, 38, + 20, 34, 32, 24, 20, 34, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, + }, { @@ -1789,10 +1935,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 13, 14, 10, 10, 22, 42, 22, 42, 34, 46, 20, - 19, 46, 20, 19, 1, 56, 56, 34, 1, 30, - 34, 36, 36, 76, 78, 44, 14, 42, 44, 28, - 2, 88, 50, 34, 1, 34, 28, 36, 22, 34, - 36, 26, 22, 34, + 19, 46, 20, 19, 19, 19, 1, 56, 56, 34, + 1, 30, 34, 36, 36, 76, 78, 44, 14, 42, + 44, 28, 2, 88, 50, 34, 1, 34, 28, 36, + 22, 34, 36, 26, 22, 34, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, + }, { @@ -1810,10 +1958,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 13, 14, 10, 10, 24, 42, 24, 42, 34, 46, 20, - 19, 46, 20, 19, 1, 54, 54, 34, 1, 30, - 34, 36, 36, 76, 78, 44, 14, 42, 44, 28, - 2, 86, 50, 34, 1, 34, 28, 36, 24, 34, - 38, 26, 24, 34, + 19, 46, 20, 19, 19, 19, 1, 54, 54, 34, + 1, 30, 34, 36, 36, 76, 78, 44, 14, 42, + 44, 28, 2, 86, 50, 34, 1, 34, 28, 36, + 24, 34, 38, 26, 24, 34, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, + }, { @@ -1831,10 +1981,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 48, 18, 17, 46, 14, 1, 48, 18, 17, 46, 14, 1, 48, 18, 17, 46, 14, 1, 15, 14, 12, 12, 26, 44, 26, 44, 32, 46, 18, - 17, 46, 18, 17, 1, 52, 52, 32, 1, 30, - 32, 34, 34, 78, 78, 46, 14, 44, 46, 30, - 0, 84, 48, 32, 1, 32, 30, 34, 26, 32, - 40, 28, 26, 32, + 17, 46, 18, 17, 17, 17, 1, 52, 52, 32, + 1, 30, 32, 34, 34, 78, 78, 46, 14, 44, + 46, 30, 0, 84, 48, 32, 1, 32, 30, 34, + 26, 32, 40, 28, 26, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, { @@ -1852,10 +2004,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 15, 14, 12, 12, 28, 44, 28, 44, 32, 46, 16, - 17, 46, 16, 17, 1, 50, 50, 32, 1, 30, - 32, 32, 32, 78, 78, 46, 14, 44, 46, 30, - 0, 82, 48, 32, 1, 32, 30, 32, 28, 32, - 42, 28, 28, 32, + 17, 46, 16, 17, 17, 17, 1, 50, 50, 32, + 1, 30, 32, 32, 32, 78, 78, 46, 14, 44, + 46, 30, 0, 82, 48, 32, 1, 32, 30, 32, + 28, 32, 42, 28, 28, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, { @@ -1873,10 +2027,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 15, 14, 14, 14, 30, 46, 30, 46, 32, 46, 16, - 17, 46, 16, 17, 1, 48, 48, 32, 1, 30, - 32, 32, 32, 78, 78, 46, 14, 46, 46, 30, - 0, 80, 48, 32, 1, 32, 30, 32, 30, 32, - 46, 30, 30, 32, + 17, 46, 16, 17, 17, 17, 1, 48, 48, 32, + 1, 30, 32, 32, 32, 78, 78, 46, 14, 46, + 46, 30, 0, 80, 48, 32, 1, 32, 30, 32, + 30, 32, 46, 30, 30, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, { @@ -1894,10 +2050,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 17, 14, 16, 16, 32, 48, 32, 48, 30, 46, 14, - 15, 46, 14, 15, 1, 46, 46, 30, 1, 30, - 30, 30, 30, 80, 78, 48, 14, 48, 48, 32, - 1, 78, 46, 30, 1, 30, 32, 30, 32, 30, - 48, 32, 32, 30, + 15, 46, 14, 15, 15, 15, 1, 46, 46, 30, + 1, 30, 30, 30, 30, 80, 78, 48, 14, 48, + 48, 32, 1, 78, 46, 30, 1, 30, 32, 30, + 32, 30, 48, 32, 32, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + }, { @@ -1915,10 +2073,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 17, 14, 16, 16, 34, 48, 34, 48, 30, 46, 14, - 15, 46, 14, 15, 1, 44, 44, 30, 1, 30, - 30, 30, 30, 80, 78, 48, 14, 48, 48, 32, - 1, 76, 46, 30, 1, 30, 32, 30, 34, 30, - 50, 32, 34, 30, + 15, 46, 14, 15, 15, 15, 1, 44, 44, 30, + 1, 30, 30, 30, 30, 80, 78, 48, 14, 48, + 48, 32, 1, 76, 46, 30, 1, 30, 32, 30, + 34, 30, 50, 32, 34, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + }, { @@ -1936,10 +2096,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 46, 12, 15, 48, 14, 1, 46, 12, 15, 48, 14, 1, 46, 12, 15, 48, 14, 1, 17, 14, 18, 18, 36, 50, 36, 50, 30, 46, 12, - 15, 46, 12, 15, 1, 42, 42, 30, 1, 30, - 30, 28, 28, 80, 78, 48, 14, 50, 48, 32, - 1, 74, 46, 30, 1, 30, 32, 28, 36, 30, - 52, 34, 36, 30, + 15, 46, 12, 15, 15, 15, 1, 42, 42, 30, + 1, 30, 30, 28, 28, 80, 78, 48, 14, 50, + 48, 32, 1, 74, 46, 30, 1, 30, 32, 28, + 36, 30, 52, 34, 36, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + }, { @@ -1957,10 +2119,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 19, 14, 20, 20, 38, 52, 38, 52, 28, 46, 10, - 13, 46, 10, 13, 1, 40, 40, 28, 1, 30, - 28, 26, 26, 82, 78, 50, 14, 52, 50, 34, - 3, 72, 44, 28, 1, 28, 34, 26, 38, 28, - 56, 36, 38, 28, + 13, 46, 10, 13, 13, 13, 1, 40, 40, 28, + 1, 30, 28, 26, 26, 82, 78, 50, 14, 52, + 50, 34, 3, 72, 44, 28, 1, 28, 34, 26, + 38, 28, 56, 36, 38, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -1978,10 +2142,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 19, 14, 20, 20, 40, 52, 40, 52, 28, 46, 10, - 13, 46, 10, 13, 1, 38, 38, 28, 1, 30, - 28, 26, 26, 82, 78, 50, 14, 52, 50, 34, - 3, 70, 44, 28, 1, 28, 34, 26, 40, 28, - 58, 36, 40, 28, + 13, 46, 10, 13, 13, 13, 1, 38, 38, 28, + 1, 30, 28, 26, 26, 82, 78, 50, 14, 52, + 50, 34, 3, 70, 44, 28, 1, 28, 34, 26, + 40, 28, 58, 36, 40, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -1999,10 +2165,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 44, 8, 13, 50, 14, 1, 44, 8, 13, 50, 14, 1, 44, 8, 13, 50, 14, 1, 19, 14, 22, 22, 42, 54, 42, 54, 28, 46, 8, - 13, 46, 8, 13, 1, 36, 36, 28, 1, 30, - 28, 24, 24, 82, 78, 50, 14, 54, 50, 34, - 3, 68, 44, 28, 1, 28, 34, 24, 42, 28, - 60, 38, 42, 28, + 13, 46, 8, 13, 13, 13, 1, 36, 36, 28, + 1, 30, 28, 24, 24, 82, 78, 50, 14, 54, + 50, 34, 3, 68, 44, 28, 1, 28, 34, 24, + 42, 28, 60, 38, 42, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -2020,10 +2188,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 42, 6, 13, 50, 14, 1, 42, 6, 13, 50, 14, 1, 42, 6, 13, 50, 14, 1, 21, 14, 22, 22, 42, 54, 42, 54, 26, 46, 6, - 13, 46, 6, 13, 1, 34, 34, 26, 1, 30, - 26, 22, 22, 82, 78, 50, 14, 54, 50, 34, - 5, 66, 42, 26, 1, 26, 34, 22, 42, 26, - 62, 38, 42, 26, + 13, 46, 6, 13, 13, 13, 1, 34, 34, 26, + 1, 30, 26, 22, 22, 82, 78, 50, 14, 54, + 50, 34, 5, 66, 42, 26, 1, 26, 34, 22, + 42, 26, 62, 38, 42, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -2041,10 +2211,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 42, 6, 11, 52, 14, 1, 42, 6, 11, 52, 14, 1, 42, 6, 11, 52, 14, 1, 21, 14, 24, 24, 44, 56, 44, 56, 26, 46, 6, - 11, 46, 6, 11, 1, 34, 34, 26, 1, 30, - 26, 22, 22, 84, 78, 52, 14, 56, 52, 36, - 5, 66, 42, 26, 1, 26, 36, 22, 44, 26, - 66, 40, 44, 26, + 11, 46, 6, 11, 11, 11, 1, 34, 34, 26, + 1, 30, 26, 22, 22, 84, 78, 52, 14, 56, + 52, 36, 5, 66, 42, 26, 1, 26, 36, 22, + 44, 26, 66, 40, 44, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, + }, { @@ -2062,10 +2234,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 21, 14, 26, 26, 46, 58, 46, 58, 26, 46, 4, - 11, 46, 4, 11, 1, 32, 32, 26, 1, 30, - 26, 20, 20, 84, 78, 52, 14, 58, 52, 36, - 5, 64, 42, 26, 1, 26, 36, 20, 46, 26, - 68, 42, 46, 26, + 11, 46, 4, 11, 11, 11, 1, 32, 32, 26, + 1, 30, 26, 20, 20, 84, 78, 52, 14, 58, + 52, 36, 5, 64, 42, 26, 1, 26, 36, 20, + 46, 26, 68, 42, 46, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, + }, { @@ -2083,10 +2257,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 21, 14, 26, 26, 48, 58, 48, 58, 26, 46, 4, - 11, 46, 4, 11, 1, 30, 30, 26, 1, 30, - 26, 20, 20, 84, 78, 52, 14, 58, 52, 36, - 5, 62, 42, 26, 1, 26, 36, 20, 48, 26, - 70, 42, 48, 26, + 11, 46, 4, 11, 11, 11, 1, 30, 30, 26, + 1, 30, 26, 20, 20, 84, 78, 52, 14, 58, + 52, 36, 5, 62, 42, 26, 1, 26, 36, 20, + 48, 26, 70, 42, 48, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, + }, { @@ -2104,10 +2280,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 40, 2, 9, 54, 14, 1, 40, 2, 9, 54, 14, 1, 40, 2, 9, 54, 14, 1, 23, 14, 28, 28, 50, 60, 50, 60, 24, 46, 2, - 9, 46, 2, 9, 1, 28, 28, 24, 1, 30, - 24, 18, 18, 86, 78, 54, 14, 60, 54, 38, - 7, 60, 40, 24, 1, 24, 38, 18, 50, 24, - 72, 44, 50, 24, + 9, 46, 2, 9, 9, 9, 1, 28, 28, 24, + 1, 30, 24, 18, 18, 86, 78, 54, 14, 60, + 54, 38, 7, 60, 40, 24, 1, 24, 38, 18, + 50, 24, 72, 44, 50, 24, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, + }, { @@ -2125,10 +2303,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 23, 14, 30, 30, 52, 62, 52, 62, 24, 46, 0, - 9, 46, 0, 9, 1, 26, 26, 24, 1, 30, - 24, 16, 16, 86, 78, 54, 14, 62, 54, 38, - 7, 58, 40, 24, 1, 24, 38, 16, 52, 24, - 76, 46, 52, 24, + 9, 46, 0, 9, 9, 9, 1, 26, 26, 24, + 1, 30, 24, 16, 16, 86, 78, 54, 14, 62, + 54, 38, 7, 58, 40, 24, 1, 24, 38, 16, + 52, 24, 76, 46, 52, 24, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, + }, { @@ -2146,10 +2326,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 23, 14, 30, 30, 54, 62, 54, 62, 24, 46, 0, - 9, 46, 0, 9, 1, 24, 24, 24, 1, 30, - 24, 16, 16, 86, 78, 54, 14, 62, 54, 38, - 7, 56, 40, 24, 1, 24, 38, 16, 54, 24, - 78, 46, 54, 24, + 9, 46, 0, 9, 9, 9, 1, 24, 24, 24, + 1, 30, 24, 16, 16, 86, 78, 54, 14, 62, + 54, 38, 7, 56, 40, 24, 1, 24, 38, 16, + 54, 24, 78, 46, 54, 24, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, + }, { @@ -2167,10 +2349,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 38, 1, 7, 56, 14, 1, 38, 1, 7, 56, 14, 1, 38, 1, 7, 56, 14, 1, 25, 14, 32, 32, 56, 64, 56, 64, 22, 46, 1, - 7, 46, 1, 7, 1, 22, 22, 22, 1, 30, - 22, 14, 14, 88, 78, 56, 14, 64, 56, 40, - 9, 54, 38, 22, 1, 22, 40, 14, 56, 22, - 80, 48, 56, 22, + 7, 46, 1, 7, 7, 7, 1, 22, 22, 22, + 1, 30, 22, 14, 14, 88, 78, 56, 14, 64, + 56, 40, 9, 54, 38, 22, 1, 22, 40, 14, + 56, 22, 80, 48, 56, 22, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, + }, { @@ -2188,10 +2372,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 25, 14, 32, 32, 58, 64, 58, 64, 22, 46, 3, - 7, 46, 3, 7, 1, 20, 20, 22, 1, 30, - 22, 12, 12, 88, 78, 56, 14, 64, 56, 40, - 9, 52, 38, 22, 1, 22, 40, 12, 58, 22, - 82, 48, 58, 22, + 7, 46, 3, 7, 7, 7, 1, 20, 20, 22, + 1, 30, 22, 12, 12, 88, 78, 56, 14, 64, + 56, 40, 9, 52, 38, 22, 1, 22, 40, 12, + 58, 22, 82, 48, 58, 22, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, + }, { @@ -2209,10 +2395,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 25, 14, 34, 34, 60, 66, 60, 66, 22, 46, 3, - 7, 46, 3, 7, 1, 18, 18, 22, 1, 30, - 22, 12, 12, 88, 78, 56, 14, 66, 56, 40, - 9, 50, 38, 22, 1, 22, 40, 12, 60, 22, - 86, 50, 60, 22, + 7, 46, 3, 7, 7, 7, 1, 18, 18, 22, + 1, 30, 22, 12, 12, 88, 78, 56, 14, 66, + 56, 40, 9, 50, 38, 22, 1, 22, 40, 12, + 60, 22, 86, 50, 60, 22, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, + }, { @@ -2230,10 +2418,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 27, 14, 36, 36, 62, 68, 62, 68, 20, 46, 5, - 5, 46, 5, 5, 1, 16, 16, 20, 1, 30, - 20, 10, 10, 90, 78, 58, 14, 68, 58, 42, - 11, 48, 36, 20, 1, 20, 42, 10, 62, 20, - 88, 52, 62, 20, + 5, 46, 5, 5, 5, 5, 1, 16, 16, 20, + 1, 30, 20, 10, 10, 90, 78, 58, 14, 68, + 58, 42, 11, 48, 36, 20, 1, 20, 42, 10, + 62, 20, 88, 52, 62, 20, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 10, 10, 10, + }, { @@ -2251,10 +2441,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 27, 14, 36, 36, 64, 68, 64, 68, 20, 46, 5, - 5, 46, 5, 5, 1, 14, 14, 20, 1, 30, - 20, 10, 10, 90, 78, 58, 14, 68, 58, 42, - 11, 46, 36, 20, 1, 20, 42, 10, 64, 20, - 90, 52, 64, 20, + 5, 46, 5, 5, 5, 5, 1, 14, 14, 20, + 1, 30, 20, 10, 10, 90, 78, 58, 14, 68, + 58, 42, 11, 46, 36, 20, 1, 20, 42, 10, + 64, 20, 90, 52, 64, 20, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 10, 10, 10, + }, { @@ -2272,10 +2464,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 36, 7, 5, 58, 14, 1, 36, 7, 5, 58, 14, 1, 36, 7, 5, 58, 14, 1, 27, 14, 38, 38, 66, 70, 66, 70, 20, 46, 7, - 5, 46, 7, 5, 1, 12, 12, 20, 1, 30, - 20, 8, 8, 90, 78, 58, 14, 70, 58, 42, - 11, 44, 36, 20, 1, 20, 42, 8, 66, 20, - 92, 54, 66, 20, + 5, 46, 7, 5, 5, 5, 1, 12, 12, 20, + 1, 30, 20, 8, 8, 90, 78, 58, 14, 70, + 58, 42, 11, 44, 36, 20, 1, 20, 42, 8, + 66, 20, 92, 54, 66, 20, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 10, 10, 10, + }, { @@ -2293,10 +2487,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 29, 14, 40, 40, 68, 72, 68, 72, 18, 46, 9, - 3, 46, 9, 3, 1, 10, 10, 18, 1, 30, - 18, 6, 6, 92, 78, 60, 14, 72, 60, 44, - 13, 42, 34, 18, 1, 18, 44, 6, 68, 18, - 96, 56, 68, 18, + 3, 46, 9, 3, 3, 3, 1, 10, 10, 18, + 1, 30, 18, 6, 6, 92, 78, 60, 14, 72, + 60, 44, 13, 42, 34, 18, 1, 18, 44, 6, + 68, 18, 96, 56, 68, 18, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -2314,10 +2510,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 29, 14, 40, 40, 70, 72, 70, 72, 18, 46, 9, - 3, 46, 9, 3, 1, 8, 8, 18, 1, 30, - 18, 6, 6, 92, 78, 60, 14, 72, 60, 44, - 13, 40, 34, 18, 1, 18, 44, 6, 70, 18, - 98, 56, 70, 18, + 3, 46, 9, 3, 3, 3, 1, 8, 8, 18, + 1, 30, 18, 6, 6, 92, 78, 60, 14, 72, + 60, 44, 13, 40, 34, 18, 1, 18, 44, 6, + 70, 18, 98, 56, 70, 18, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -2335,10 +2533,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 34, 11, 3, 60, 14, 1, 34, 11, 3, 60, 14, 1, 34, 11, 3, 60, 14, 1, 29, 14, 42, 42, 72, 74, 72, 74, 18, 46, 11, - 3, 46, 11, 3, 1, 6, 6, 18, 1, 30, - 18, 4, 4, 92, 78, 60, 14, 74, 60, 44, - 13, 38, 34, 18, 1, 18, 44, 4, 72, 18, - 100, 58, 72, 18, + 3, 46, 11, 3, 3, 3, 1, 6, 6, 18, + 1, 30, 18, 4, 4, 92, 78, 60, 14, 74, + 60, 44, 13, 38, 34, 18, 1, 18, 44, 4, + 72, 18, 100, 58, 72, 18, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -2356,10 +2556,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 32, 13, 3, 60, 14, 1, 32, 13, 3, 60, 14, 1, 32, 13, 3, 60, 14, 1, 31, 14, 42, 42, 72, 74, 72, 74, 16, 46, 13, - 3, 46, 13, 3, 1, 4, 4, 16, 1, 30, - 16, 2, 2, 92, 78, 60, 14, 74, 60, 44, - 15, 36, 32, 16, 1, 16, 44, 2, 72, 16, - 102, 58, 72, 16, + 3, 46, 13, 3, 3, 3, 1, 4, 4, 16, + 1, 30, 16, 2, 2, 92, 78, 60, 14, 74, + 60, 44, 15, 36, 32, 16, 1, 16, 44, 2, + 72, 16, 102, 58, 72, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -2377,10 +2579,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 32, 13, 1, 62, 14, 1, 32, 13, 1, 62, 14, 1, 32, 13, 1, 62, 14, 1, 31, 14, 44, 44, 74, 76, 74, 76, 16, 46, 13, - 1, 46, 13, 1, 1, 4, 4, 16, 1, 30, - 16, 2, 2, 94, 78, 62, 14, 76, 62, 46, - 15, 36, 32, 16, 1, 16, 46, 2, 74, 16, - 106, 60, 74, 16, + 1, 46, 13, 1, 1, 1, 1, 4, 4, 16, + 1, 30, 16, 2, 2, 94, 78, 62, 14, 76, + 62, 46, 15, 36, 32, 16, 1, 16, 46, 2, + 74, 16, 106, 60, 74, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, + }, { @@ -2398,10 +2602,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 31, 14, 46, 46, 76, 78, 76, 78, 16, 46, 15, - 1, 46, 15, 1, 1, 2, 2, 16, 1, 30, - 16, 0, 0, 94, 78, 62, 14, 78, 62, 46, - 15, 34, 32, 16, 1, 16, 46, 0, 76, 16, - 108, 62, 76, 16, + 1, 46, 15, 1, 1, 1, 1, 2, 2, 16, + 1, 30, 16, 0, 0, 94, 78, 62, 14, 78, + 62, 46, 15, 34, 32, 16, 1, 16, 46, 0, + 76, 16, 108, 62, 76, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, + }, { @@ -2419,10 +2625,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 31, 14, 46, 46, 78, 78, 78, 78, 16, 46, 15, - 1, 46, 15, 1, 1, 0, 0, 16, 1, 30, - 16, 0, 0, 94, 78, 62, 14, 78, 62, 46, - 15, 32, 32, 16, 1, 16, 46, 0, 78, 16, - 110, 62, 78, 16, + 1, 46, 15, 1, 1, 1, 1, 0, 0, 16, + 1, 30, 16, 0, 0, 94, 78, 62, 14, 78, + 62, 46, 15, 32, 32, 16, 1, 16, 46, 0, + 78, 16, 110, 62, 78, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, + }, }, @@ -2443,10 +2651,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 33, 62, 46, 33, 30, 14, 1, 62, 46, 33, 30, 14, 1, 62, 46, 33, 30, 14, 1, 1, 14, 1, 1, 1, 14, 1, 14, 46, 46, 46, - 33, 46, 46, 33, 1, 94, 46, 46, 1, 30, - 46, 62, 62, 62, 78, 30, 14, 14, 30, 1, - 14, 124, 62, 46, 1, 30, 46, 62, 17, 46, - 17, 17, 17, 46, + 33, 46, 46, 33, 33, 33, 1, 94, 46, 46, + 1, 30, 46, 62, 62, 62, 78, 30, 14, 14, + 30, 1, 14, 124, 62, 46, 1, 30, 46, 62, + 17, 46, 17, 17, 17, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 17, 17, 17, 17, + }, { @@ -2464,10 +2674,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 31, 62, 46, 31, 32, 14, 1, 62, 46, 31, 32, 14, 1, 62, 46, 31, 32, 14, 1, 1, 14, 0, 0, 0, 16, 0, 16, 46, 46, 46, - 31, 46, 46, 31, 1, 94, 46, 46, 1, 30, - 46, 62, 62, 64, 78, 32, 14, 16, 32, 0, - 14, 124, 62, 46, 1, 30, 46, 62, 15, 46, - 13, 15, 15, 46, + 31, 46, 46, 31, 31, 31, 1, 94, 46, 46, + 1, 30, 46, 62, 62, 64, 78, 32, 14, 16, + 32, 0, 14, 124, 62, 46, 1, 30, 46, 62, + 15, 46, 13, 15, 15, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 15, 15, 15, 15, + }, { @@ -2485,10 +2697,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 29, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 1, 14, 0, 0, 2, 18, 2, 18, 46, 46, 44, - 31, 46, 44, 31, 1, 92, 46, 46, 1, 30, - 46, 60, 60, 64, 78, 32, 14, 18, 32, 2, - 14, 124, 62, 46, 1, 30, 46, 60, 13, 46, - 11, 13, 13, 46, + 31, 46, 44, 31, 31, 31, 1, 92, 46, 46, + 1, 30, 46, 60, 60, 64, 78, 32, 14, 18, + 32, 2, 14, 124, 62, 46, 1, 30, 46, 60, + 13, 46, 11, 13, 13, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 15, 15, 15, 15, + }, { @@ -2506,10 +2720,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 29, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 62, 44, 31, 32, 14, 1, 1, 14, 0, 0, 2, 18, 2, 18, 46, 46, 44, - 31, 46, 44, 31, 1, 90, 46, 46, 1, 30, - 46, 60, 60, 64, 78, 32, 14, 18, 32, 2, - 14, 124, 62, 46, 1, 30, 46, 60, 11, 46, - 9, 11, 11, 46, + 31, 46, 44, 31, 31, 31, 1, 90, 46, 46, + 1, 30, 46, 60, 60, 64, 78, 32, 14, 18, + 32, 2, 14, 124, 62, 46, 1, 30, 46, 60, + 11, 46, 9, 11, 11, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 15, 15, 15, 15, + }, { @@ -2527,10 +2743,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 27, 60, 42, 29, 34, 14, 1, 60, 42, 29, 34, 14, 1, 60, 42, 29, 34, 14, 1, 3, 14, 2, 2, 4, 20, 4, 20, 44, 46, 42, - 29, 46, 42, 29, 1, 88, 44, 44, 1, 30, - 44, 58, 58, 66, 78, 34, 14, 20, 34, 4, - 12, 124, 60, 44, 1, 30, 44, 58, 9, 44, - 7, 9, 9, 44, + 29, 46, 42, 29, 29, 29, 1, 88, 44, 44, + 1, 30, 44, 58, 58, 66, 78, 34, 14, 20, + 34, 4, 12, 124, 60, 44, 1, 30, 44, 58, + 9, 44, 7, 9, 9, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, + }, { @@ -2548,10 +2766,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 25, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 3, 14, 2, 2, 6, 22, 6, 22, 44, 46, 40, - 29, 46, 40, 29, 1, 86, 44, 44, 1, 30, - 44, 56, 56, 66, 78, 34, 14, 22, 34, 6, - 12, 124, 60, 44, 1, 30, 44, 56, 7, 44, - 3, 7, 7, 44, + 29, 46, 40, 29, 29, 29, 1, 86, 44, 44, + 1, 30, 44, 56, 56, 66, 78, 34, 14, 22, + 34, 6, 12, 124, 60, 44, 1, 30, 44, 56, + 7, 44, 3, 7, 7, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, + }, { @@ -2569,10 +2789,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 25, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 60, 40, 29, 34, 14, 1, 3, 14, 2, 2, 6, 22, 6, 22, 44, 46, 40, - 29, 46, 40, 29, 1, 84, 44, 44, 1, 30, - 44, 56, 56, 66, 78, 34, 14, 22, 34, 6, - 12, 124, 60, 44, 1, 30, 44, 56, 5, 44, - 1, 5, 5, 44, + 29, 46, 40, 29, 29, 29, 1, 84, 44, 44, + 1, 30, 44, 56, 56, 66, 78, 34, 14, 22, + 34, 6, 12, 124, 60, 44, 1, 30, 44, 56, + 5, 44, 1, 5, 5, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 13, 13, 13, 13, + }, { @@ -2590,10 +2812,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 23, 58, 38, 27, 36, 14, 1, 58, 38, 27, 36, 14, 1, 58, 38, 27, 36, 14, 1, 5, 14, 4, 4, 8, 24, 8, 24, 42, 46, 38, - 27, 46, 38, 27, 1, 82, 42, 42, 1, 30, - 42, 54, 54, 68, 78, 36, 14, 24, 36, 8, - 10, 124, 58, 42, 1, 30, 42, 54, 3, 42, - 0, 3, 3, 42, + 27, 46, 38, 27, 27, 27, 1, 82, 42, 42, + 1, 30, 42, 54, 54, 68, 78, 36, 14, 24, + 36, 8, 10, 124, 58, 42, 1, 30, 42, 54, + 3, 42, 0, 3, 3, 42, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 11, 11, 11, 11, + }, { @@ -2611,10 +2835,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 23, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 5, 14, 4, 4, 8, 24, 8, 24, 42, 46, 36, - 27, 46, 36, 27, 1, 80, 42, 42, 1, 30, - 42, 52, 52, 68, 78, 36, 14, 24, 36, 8, - 10, 124, 58, 42, 1, 30, 42, 52, 1, 42, - 2, 1, 1, 42, + 27, 46, 36, 27, 27, 27, 1, 80, 42, 42, + 1, 30, 42, 52, 52, 68, 78, 36, 14, 24, + 36, 8, 10, 124, 58, 42, 1, 30, 42, 52, + 1, 42, 2, 1, 1, 42, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 11, 11, 11, 11, + }, { @@ -2632,10 +2858,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 21, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 58, 36, 27, 36, 14, 1, 5, 14, 4, 4, 10, 26, 10, 26, 42, 46, 36, - 27, 46, 36, 27, 1, 78, 42, 42, 1, 30, - 42, 52, 52, 68, 78, 36, 14, 26, 36, 10, - 10, 124, 58, 42, 1, 30, 42, 52, 0, 42, - 6, 0, 0, 42, + 27, 46, 36, 27, 27, 27, 1, 78, 42, 42, + 1, 30, 42, 52, 52, 68, 78, 36, 14, 26, + 36, 10, 10, 124, 58, 42, 1, 30, 42, 52, + 0, 42, 6, 0, 0, 42, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 11, 11, 11, 11, + }, { @@ -2653,10 +2881,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 19, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 7, 14, 6, 6, 12, 28, 12, 28, 40, 46, 34, - 25, 46, 34, 25, 1, 76, 40, 40, 1, 30, - 40, 50, 50, 70, 78, 38, 14, 28, 38, 12, - 8, 124, 56, 40, 1, 30, 40, 50, 2, 40, - 8, 2, 2, 40, + 25, 46, 34, 25, 25, 25, 1, 76, 40, 40, + 1, 30, 40, 50, 50, 70, 78, 38, 14, 28, + 38, 12, 8, 124, 56, 40, 1, 30, 40, 50, + 2, 40, 8, 2, 2, 40, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, + }, { @@ -2674,10 +2904,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 19, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 56, 34, 25, 38, 14, 1, 7, 14, 6, 6, 12, 28, 12, 28, 40, 46, 34, - 25, 46, 34, 25, 1, 74, 40, 40, 1, 30, - 40, 50, 50, 70, 78, 38, 14, 28, 38, 12, - 8, 124, 56, 40, 1, 30, 40, 50, 4, 40, - 10, 4, 4, 40, + 25, 46, 34, 25, 25, 25, 1, 74, 40, 40, + 1, 30, 40, 50, 50, 70, 78, 38, 14, 28, + 38, 12, 8, 124, 56, 40, 1, 30, 40, 50, + 4, 40, 10, 4, 4, 40, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, + }, { @@ -2695,10 +2927,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 17, 56, 32, 25, 38, 14, 1, 56, 32, 25, 38, 14, 1, 56, 32, 25, 38, 14, 1, 7, 14, 6, 6, 14, 30, 14, 30, 40, 46, 32, - 25, 46, 32, 25, 1, 72, 40, 40, 1, 30, - 40, 48, 48, 70, 78, 38, 14, 30, 38, 14, - 8, 124, 56, 40, 1, 30, 40, 48, 6, 40, - 12, 6, 6, 40, + 25, 46, 32, 25, 25, 25, 1, 72, 40, 40, + 1, 30, 40, 48, 48, 70, 78, 38, 14, 30, + 38, 14, 8, 124, 56, 40, 1, 30, 40, 48, + 6, 40, 12, 6, 6, 40, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, + }, { @@ -2716,10 +2950,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 15, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 9, 14, 8, 8, 16, 32, 16, 32, 38, 46, 30, - 23, 46, 30, 23, 1, 70, 38, 38, 1, 30, - 38, 46, 46, 72, 78, 40, 14, 32, 40, 16, - 6, 124, 54, 38, 1, 30, 38, 46, 8, 38, - 16, 8, 8, 38, + 23, 46, 30, 23, 23, 23, 1, 70, 38, 38, + 1, 30, 38, 46, 46, 72, 78, 40, 14, 32, + 40, 16, 6, 124, 54, 38, 1, 30, 38, 46, + 8, 38, 16, 8, 8, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -2737,10 +2973,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 15, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 54, 30, 23, 40, 14, 1, 9, 14, 8, 8, 16, 32, 16, 32, 38, 46, 30, - 23, 46, 30, 23, 1, 68, 38, 38, 1, 30, - 38, 46, 46, 72, 78, 40, 14, 32, 40, 16, - 6, 124, 54, 38, 1, 30, 38, 46, 10, 38, - 18, 10, 10, 38, + 23, 46, 30, 23, 23, 23, 1, 68, 38, 38, + 1, 30, 38, 46, 46, 72, 78, 40, 14, 32, + 40, 16, 6, 124, 54, 38, 1, 30, 38, 46, + 10, 38, 18, 10, 10, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -2758,10 +2996,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 13, 54, 28, 23, 40, 14, 1, 54, 28, 23, 40, 14, 1, 54, 28, 23, 40, 14, 1, 9, 14, 8, 8, 18, 34, 18, 34, 38, 46, 28, - 23, 46, 28, 23, 1, 66, 38, 38, 1, 30, - 38, 44, 44, 72, 78, 40, 14, 34, 40, 18, - 6, 122, 54, 38, 1, 30, 38, 44, 12, 38, - 20, 12, 12, 38, + 23, 46, 28, 23, 23, 23, 1, 66, 38, 38, + 1, 30, 38, 44, 44, 72, 78, 40, 14, 34, + 40, 18, 6, 122, 54, 38, 1, 30, 38, 44, + 12, 38, 20, 12, 12, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -2779,10 +3019,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 13, 52, 26, 23, 40, 14, 1, 52, 26, 23, 40, 14, 1, 52, 26, 23, 40, 14, 1, 11, 14, 8, 8, 18, 34, 18, 34, 36, 46, 26, - 23, 46, 26, 23, 1, 64, 36, 36, 1, 30, - 36, 42, 42, 72, 78, 40, 14, 34, 40, 18, - 4, 118, 52, 36, 1, 30, 36, 42, 12, 36, - 22, 12, 12, 36, + 23, 46, 26, 23, 23, 23, 1, 64, 36, 36, + 1, 30, 36, 42, 42, 72, 78, 40, 14, 34, + 40, 18, 4, 118, 52, 36, 1, 30, 36, 42, + 12, 36, 22, 12, 12, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, + }, { @@ -2800,10 +3042,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 11, 52, 26, 21, 42, 14, 1, 52, 26, 21, 42, 14, 1, 52, 26, 21, 42, 14, 1, 11, 14, 10, 10, 20, 36, 20, 36, 36, 46, 26, - 21, 46, 26, 21, 1, 64, 36, 36, 1, 30, - 36, 42, 42, 74, 78, 42, 14, 36, 42, 20, - 4, 116, 52, 36, 1, 30, 36, 42, 14, 36, - 26, 14, 14, 36, + 21, 46, 26, 21, 21, 21, 1, 64, 36, 36, + 1, 30, 36, 42, 42, 74, 78, 42, 14, 36, + 42, 20, 4, 116, 52, 36, 1, 30, 36, 42, + 14, 36, 26, 14, 14, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, + }, { @@ -2821,10 +3065,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 9, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 11, 14, 10, 10, 22, 38, 22, 38, 36, 46, 24, - 21, 46, 24, 21, 1, 62, 36, 36, 1, 30, - 36, 40, 40, 74, 78, 42, 14, 38, 42, 22, - 4, 114, 52, 36, 1, 30, 36, 40, 16, 36, - 28, 16, 16, 36, + 21, 46, 24, 21, 21, 21, 1, 62, 36, 36, + 1, 30, 36, 40, 40, 74, 78, 42, 14, 38, + 42, 22, 4, 114, 52, 36, 1, 30, 36, 40, + 16, 36, 28, 16, 16, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, + }, { @@ -2842,10 +3088,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 9, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 52, 24, 21, 42, 14, 1, 11, 14, 10, 10, 22, 38, 22, 38, 36, 46, 24, - 21, 46, 24, 21, 1, 60, 36, 36, 1, 30, - 36, 40, 40, 74, 78, 42, 14, 38, 42, 22, - 4, 112, 52, 36, 1, 30, 36, 40, 18, 36, - 30, 18, 18, 36, + 21, 46, 24, 21, 21, 21, 1, 60, 36, 36, + 1, 30, 36, 40, 40, 74, 78, 42, 14, 38, + 42, 22, 4, 112, 52, 36, 1, 30, 36, 40, + 18, 36, 30, 18, 18, 36, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, + }, { @@ -2863,10 +3111,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 7, 50, 22, 19, 44, 14, 1, 50, 22, 19, 44, 14, 1, 50, 22, 19, 44, 14, 1, 13, 14, 12, 12, 24, 40, 24, 40, 34, 46, 22, - 19, 46, 22, 19, 1, 58, 34, 34, 1, 30, - 34, 38, 38, 76, 78, 44, 14, 40, 44, 24, - 2, 108, 50, 34, 1, 30, 34, 38, 20, 34, - 32, 20, 20, 34, + 19, 46, 22, 19, 19, 19, 1, 58, 34, 34, + 1, 30, 34, 38, 38, 76, 78, 44, 14, 40, + 44, 24, 2, 108, 50, 34, 1, 30, 34, 38, + 20, 34, 32, 20, 20, 34, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, + }, { @@ -2884,10 +3134,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 5, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 13, 14, 12, 12, 26, 42, 26, 42, 34, 46, 20, - 19, 46, 20, 19, 1, 56, 34, 34, 1, 30, - 34, 36, 36, 76, 78, 44, 14, 42, 44, 26, - 2, 106, 50, 34, 1, 30, 34, 36, 22, 34, - 36, 22, 22, 34, + 19, 46, 20, 19, 19, 19, 1, 56, 34, 34, + 1, 30, 34, 36, 36, 76, 78, 44, 14, 42, + 44, 26, 2, 106, 50, 34, 1, 30, 34, 36, + 22, 34, 36, 22, 22, 34, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, + }, { @@ -2905,10 +3157,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 5, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 50, 20, 19, 44, 14, 1, 13, 14, 12, 12, 26, 42, 26, 42, 34, 46, 20, - 19, 46, 20, 19, 1, 54, 34, 34, 1, 30, - 34, 36, 36, 76, 78, 44, 14, 42, 44, 26, - 2, 104, 50, 34, 1, 30, 34, 36, 24, 34, - 38, 24, 24, 34, + 19, 46, 20, 19, 19, 19, 1, 54, 34, 34, + 1, 30, 34, 36, 36, 76, 78, 44, 14, 42, + 44, 26, 2, 104, 50, 34, 1, 30, 34, 36, + 24, 34, 38, 24, 24, 34, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, + }, { @@ -2926,10 +3180,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 3, 48, 18, 17, 46, 14, 1, 48, 18, 17, 46, 14, 1, 48, 18, 17, 46, 14, 1, 15, 14, 14, 14, 28, 44, 28, 44, 32, 46, 18, - 17, 46, 18, 17, 1, 52, 32, 32, 1, 30, - 32, 34, 34, 78, 78, 46, 14, 44, 46, 28, - 0, 102, 48, 32, 1, 30, 32, 34, 26, 32, - 40, 26, 26, 32, + 17, 46, 18, 17, 17, 17, 1, 52, 32, 32, + 1, 30, 32, 34, 34, 78, 78, 46, 14, 44, + 46, 28, 0, 102, 48, 32, 1, 30, 32, 34, + 26, 32, 40, 26, 26, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, { @@ -2947,10 +3203,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 3, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 15, 14, 14, 14, 28, 44, 28, 44, 32, 46, 16, - 17, 46, 16, 17, 1, 50, 32, 32, 1, 30, - 32, 32, 32, 78, 78, 46, 14, 44, 46, 28, - 0, 98, 48, 32, 1, 30, 32, 32, 28, 32, - 42, 28, 28, 32, + 17, 46, 16, 17, 17, 17, 1, 50, 32, 32, + 1, 30, 32, 32, 32, 78, 78, 46, 14, 44, + 46, 28, 0, 98, 48, 32, 1, 30, 32, 32, + 28, 32, 42, 28, 28, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, { @@ -2968,10 +3226,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 1, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 48, 16, 17, 46, 14, 1, 15, 14, 14, 14, 30, 46, 30, 46, 32, 46, 16, - 17, 46, 16, 17, 1, 48, 32, 32, 1, 30, - 32, 32, 32, 78, 78, 46, 14, 46, 46, 30, - 0, 96, 48, 32, 1, 30, 32, 32, 30, 32, - 46, 30, 30, 32, + 17, 46, 16, 17, 17, 17, 1, 48, 32, 32, + 1, 30, 32, 32, 32, 78, 78, 46, 14, 46, + 46, 30, 0, 96, 48, 32, 1, 30, 32, 32, + 30, 32, 46, 30, 30, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, { @@ -2989,10 +3249,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 0, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 17, 14, 16, 16, 32, 48, 32, 48, 30, 46, 14, - 15, 46, 14, 15, 1, 46, 30, 30, 1, 30, - 30, 30, 30, 80, 78, 48, 14, 48, 48, 32, - 1, 94, 46, 30, 1, 30, 30, 30, 32, 30, - 48, 32, 32, 30, + 15, 46, 14, 15, 15, 15, 1, 46, 30, 30, + 1, 30, 30, 30, 30, 80, 78, 48, 14, 48, + 48, 32, 1, 94, 46, 30, 1, 30, 30, 30, + 32, 30, 48, 32, 32, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + }, { @@ -3010,10 +3272,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 0, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 46, 14, 15, 48, 14, 1, 17, 14, 16, 16, 32, 48, 32, 48, 30, 46, 14, - 15, 46, 14, 15, 1, 44, 30, 30, 1, 30, - 30, 30, 30, 80, 78, 48, 14, 48, 48, 32, - 1, 92, 46, 30, 1, 30, 30, 30, 34, 30, - 50, 34, 34, 30, + 15, 46, 14, 15, 15, 15, 1, 44, 30, 30, + 1, 30, 30, 30, 30, 80, 78, 48, 14, 48, + 48, 32, 1, 92, 46, 30, 1, 30, 30, 30, + 34, 30, 50, 34, 34, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + }, { @@ -3031,10 +3295,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 2, 46, 12, 15, 48, 14, 1, 46, 12, 15, 48, 14, 1, 46, 12, 15, 48, 14, 1, 17, 14, 16, 16, 34, 50, 34, 50, 30, 46, 12, - 15, 46, 12, 15, 1, 42, 30, 30, 1, 30, - 30, 28, 28, 80, 78, 48, 14, 50, 48, 34, - 1, 88, 46, 30, 1, 30, 30, 28, 36, 30, - 52, 36, 36, 30, + 15, 46, 12, 15, 15, 15, 1, 42, 30, 30, + 1, 30, 30, 28, 28, 80, 78, 48, 14, 50, + 48, 34, 1, 88, 46, 30, 1, 30, 30, 28, + 36, 30, 52, 36, 36, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + }, { @@ -3052,10 +3318,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 4, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 19, 14, 18, 18, 36, 52, 36, 52, 28, 46, 10, - 13, 46, 10, 13, 1, 40, 28, 28, 1, 30, - 28, 26, 26, 82, 78, 50, 14, 52, 50, 36, - 3, 86, 44, 28, 1, 30, 28, 26, 38, 28, - 56, 38, 38, 28, + 13, 46, 10, 13, 13, 13, 1, 40, 28, 28, + 1, 30, 28, 26, 26, 82, 78, 50, 14, 52, + 50, 36, 3, 86, 44, 28, 1, 30, 28, 26, + 38, 28, 56, 38, 38, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -3073,10 +3341,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 4, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 44, 10, 13, 50, 14, 1, 19, 14, 18, 18, 36, 52, 36, 52, 28, 46, 10, - 13, 46, 10, 13, 1, 38, 28, 28, 1, 30, - 28, 26, 26, 82, 78, 50, 14, 52, 50, 36, - 3, 84, 44, 28, 1, 30, 28, 26, 40, 28, - 58, 40, 40, 28, + 13, 46, 10, 13, 13, 13, 1, 38, 28, 28, + 1, 30, 28, 26, 26, 82, 78, 50, 14, 52, + 50, 36, 3, 84, 44, 28, 1, 30, 28, 26, + 40, 28, 58, 40, 40, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -3094,10 +3364,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 6, 44, 8, 13, 50, 14, 1, 44, 8, 13, 50, 14, 1, 44, 8, 13, 50, 14, 1, 19, 14, 18, 18, 38, 54, 38, 54, 28, 46, 8, - 13, 46, 8, 13, 1, 36, 28, 28, 1, 30, - 28, 24, 24, 82, 78, 50, 14, 54, 50, 38, - 3, 82, 44, 28, 1, 30, 28, 24, 42, 28, - 60, 42, 42, 28, + 13, 46, 8, 13, 13, 13, 1, 36, 28, 28, + 1, 30, 28, 24, 24, 82, 78, 50, 14, 54, + 50, 38, 3, 82, 44, 28, 1, 30, 28, 24, + 42, 28, 60, 42, 42, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -3115,10 +3387,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 6, 42, 6, 13, 50, 14, 1, 42, 6, 13, 50, 14, 1, 42, 6, 13, 50, 14, 1, 21, 14, 18, 18, 38, 54, 38, 54, 26, 46, 6, - 13, 46, 6, 13, 1, 34, 26, 26, 1, 30, - 26, 22, 22, 82, 78, 50, 14, 54, 50, 38, - 5, 78, 42, 26, 1, 30, 26, 22, 42, 26, - 62, 42, 42, 26, + 13, 46, 6, 13, 13, 13, 1, 34, 26, 26, + 1, 30, 26, 22, 22, 82, 78, 50, 14, 54, + 50, 38, 5, 78, 42, 26, 1, 30, 26, 22, + 42, 26, 62, 42, 42, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + }, { @@ -3136,10 +3410,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 8, 42, 6, 11, 52, 14, 1, 42, 6, 11, 52, 14, 1, 42, 6, 11, 52, 14, 1, 21, 14, 20, 20, 40, 56, 40, 56, 26, 46, 6, - 11, 46, 6, 11, 1, 34, 26, 26, 1, 30, - 26, 22, 22, 84, 78, 52, 14, 56, 52, 40, - 5, 76, 42, 26, 1, 30, 26, 22, 44, 26, - 66, 44, 44, 26, + 11, 46, 6, 11, 11, 11, 1, 34, 26, 26, + 1, 30, 26, 22, 22, 84, 78, 52, 14, 56, + 52, 40, 5, 76, 42, 26, 1, 30, 26, 22, + 44, 26, 66, 44, 44, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, + }, { @@ -3157,10 +3433,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 10, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 21, 14, 20, 20, 42, 58, 42, 58, 26, 46, 4, - 11, 46, 4, 11, 1, 32, 26, 26, 1, 30, - 26, 20, 20, 84, 78, 52, 14, 58, 52, 42, - 5, 74, 42, 26, 1, 30, 26, 20, 46, 26, - 68, 46, 46, 26, + 11, 46, 4, 11, 11, 11, 1, 32, 26, 26, + 1, 30, 26, 20, 20, 84, 78, 52, 14, 58, + 52, 42, 5, 74, 42, 26, 1, 30, 26, 20, + 46, 26, 68, 46, 46, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, + }, { @@ -3178,10 +3456,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 10, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 42, 4, 11, 52, 14, 1, 21, 14, 20, 20, 42, 58, 42, 58, 26, 46, 4, - 11, 46, 4, 11, 1, 30, 26, 26, 1, 30, - 26, 20, 20, 84, 78, 52, 14, 58, 52, 42, - 5, 72, 42, 26, 1, 30, 26, 20, 48, 26, - 70, 48, 48, 26, + 11, 46, 4, 11, 11, 11, 1, 30, 26, 26, + 1, 30, 26, 20, 20, 84, 78, 52, 14, 58, + 52, 42, 5, 72, 42, 26, 1, 30, 26, 20, + 48, 26, 70, 48, 48, 26, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, + }, { @@ -3199,10 +3479,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 12, 40, 2, 9, 54, 14, 1, 40, 2, 9, 54, 14, 1, 40, 2, 9, 54, 14, 1, 23, 14, 22, 22, 44, 60, 44, 60, 24, 46, 2, - 9, 46, 2, 9, 1, 28, 24, 24, 1, 30, - 24, 18, 18, 86, 78, 54, 14, 60, 54, 44, - 7, 68, 40, 24, 1, 30, 24, 18, 50, 24, - 72, 50, 50, 24, + 9, 46, 2, 9, 9, 9, 1, 28, 24, 24, + 1, 30, 24, 18, 18, 86, 78, 54, 14, 60, + 54, 44, 7, 68, 40, 24, 1, 30, 24, 18, + 50, 24, 72, 50, 50, 24, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, + }, { @@ -3220,10 +3502,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 23, 14, 22, 22, 46, 62, 46, 62, 24, 46, 0, - 9, 46, 0, 9, 1, 26, 24, 24, 1, 30, - 24, 16, 16, 86, 78, 54, 14, 62, 54, 46, - 7, 66, 40, 24, 1, 30, 24, 16, 52, 24, - 76, 52, 52, 24, + 9, 46, 0, 9, 9, 9, 1, 26, 24, 24, + 1, 30, 24, 16, 16, 86, 78, 54, 14, 62, + 54, 46, 7, 66, 40, 24, 1, 30, 24, 16, + 52, 24, 76, 52, 52, 24, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, + }, { @@ -3241,10 +3525,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 14, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 40, 0, 9, 54, 14, 1, 23, 14, 22, 22, 46, 62, 46, 62, 24, 46, 0, - 9, 46, 0, 9, 1, 24, 24, 24, 1, 30, - 24, 16, 16, 86, 78, 54, 14, 62, 54, 46, - 7, 64, 40, 24, 1, 30, 24, 16, 54, 24, - 78, 54, 54, 24, + 9, 46, 0, 9, 9, 9, 1, 24, 24, 24, + 1, 30, 24, 16, 16, 86, 78, 54, 14, 62, + 54, 46, 7, 64, 40, 24, 1, 30, 24, 16, + 54, 24, 78, 54, 54, 24, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, + }, { @@ -3262,10 +3548,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 16, 38, 1, 7, 56, 14, 1, 38, 1, 7, 56, 14, 1, 38, 1, 7, 56, 14, 1, 25, 14, 24, 24, 48, 64, 48, 64, 22, 46, 1, - 7, 46, 1, 7, 1, 22, 22, 22, 1, 30, - 22, 14, 14, 88, 78, 56, 14, 64, 56, 48, - 9, 62, 38, 22, 1, 30, 22, 14, 56, 22, - 80, 56, 56, 22, + 7, 46, 1, 7, 7, 7, 1, 22, 22, 22, + 1, 30, 22, 14, 14, 88, 78, 56, 14, 64, + 56, 48, 9, 62, 38, 22, 1, 30, 22, 14, + 56, 22, 80, 56, 56, 22, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, + }, { @@ -3283,10 +3571,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 16, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 25, 14, 24, 24, 48, 64, 48, 64, 22, 46, 3, - 7, 46, 3, 7, 1, 20, 22, 22, 1, 30, - 22, 12, 12, 88, 78, 56, 14, 64, 56, 48, - 9, 58, 38, 22, 1, 30, 22, 12, 58, 22, - 82, 58, 58, 22, + 7, 46, 3, 7, 7, 7, 1, 20, 22, 22, + 1, 30, 22, 12, 12, 88, 78, 56, 14, 64, + 56, 48, 9, 58, 38, 22, 1, 30, 22, 12, + 58, 22, 82, 58, 58, 22, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, + }, { @@ -3304,10 +3594,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 18, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 38, 3, 7, 56, 14, 1, 25, 14, 24, 24, 50, 66, 50, 66, 22, 46, 3, - 7, 46, 3, 7, 1, 18, 22, 22, 1, 30, - 22, 12, 12, 88, 78, 56, 14, 66, 56, 50, - 9, 56, 38, 22, 1, 30, 22, 12, 60, 22, - 86, 60, 60, 22, + 7, 46, 3, 7, 7, 7, 1, 18, 22, 22, + 1, 30, 22, 12, 12, 88, 78, 56, 14, 66, + 56, 50, 9, 56, 38, 22, 1, 30, 22, 12, + 60, 22, 86, 60, 60, 22, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, + }, { @@ -3325,10 +3617,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 20, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 27, 14, 26, 26, 52, 68, 52, 68, 20, 46, 5, - 5, 46, 5, 5, 1, 16, 20, 20, 1, 30, - 20, 10, 10, 90, 78, 58, 14, 68, 58, 52, - 11, 54, 36, 20, 1, 30, 20, 10, 62, 20, - 88, 62, 62, 20, + 5, 46, 5, 5, 5, 5, 1, 16, 20, 20, + 1, 30, 20, 10, 10, 90, 78, 58, 14, 68, + 58, 52, 11, 54, 36, 20, 1, 30, 20, 10, + 62, 20, 88, 62, 62, 20, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 10, 10, 10, + }, { @@ -3346,10 +3640,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 20, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 36, 5, 5, 58, 14, 1, 27, 14, 26, 26, 52, 68, 52, 68, 20, 46, 5, - 5, 46, 5, 5, 1, 14, 20, 20, 1, 30, - 20, 10, 10, 90, 78, 58, 14, 68, 58, 52, - 11, 52, 36, 20, 1, 30, 20, 10, 64, 20, - 90, 64, 64, 20, + 5, 46, 5, 5, 5, 5, 1, 14, 20, 20, + 1, 30, 20, 10, 10, 90, 78, 58, 14, 68, + 58, 52, 11, 52, 36, 20, 1, 30, 20, 10, + 64, 20, 90, 64, 64, 20, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 10, 10, 10, + }, { @@ -3367,10 +3663,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 22, 36, 7, 5, 58, 14, 1, 36, 7, 5, 58, 14, 1, 36, 7, 5, 58, 14, 1, 27, 14, 26, 26, 54, 70, 54, 70, 20, 46, 7, - 5, 46, 7, 5, 1, 12, 20, 20, 1, 30, - 20, 8, 8, 90, 78, 58, 14, 70, 58, 54, - 11, 48, 36, 20, 1, 30, 20, 8, 66, 20, - 92, 66, 66, 20, + 5, 46, 7, 5, 5, 5, 1, 12, 20, 20, + 1, 30, 20, 8, 8, 90, 78, 58, 14, 70, + 58, 54, 11, 48, 36, 20, 1, 30, 20, 8, + 66, 20, 92, 66, 66, 20, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 10, 10, 10, + }, { @@ -3388,10 +3686,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 24, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 29, 14, 28, 28, 56, 72, 56, 72, 18, 46, 9, - 3, 46, 9, 3, 1, 10, 18, 18, 1, 30, - 18, 6, 6, 92, 78, 60, 14, 72, 60, 56, - 13, 46, 34, 18, 1, 30, 18, 6, 68, 18, - 96, 68, 68, 18, + 3, 46, 9, 3, 3, 3, 1, 10, 18, 18, + 1, 30, 18, 6, 6, 92, 78, 60, 14, 72, + 60, 56, 13, 46, 34, 18, 1, 30, 18, 6, + 68, 18, 96, 68, 68, 18, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -3409,10 +3709,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 24, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 34, 9, 3, 60, 14, 1, 29, 14, 28, 28, 56, 72, 56, 72, 18, 46, 9, - 3, 46, 9, 3, 1, 8, 18, 18, 1, 30, - 18, 6, 6, 92, 78, 60, 14, 72, 60, 56, - 13, 44, 34, 18, 1, 30, 18, 6, 70, 18, - 98, 70, 70, 18, + 3, 46, 9, 3, 3, 3, 1, 8, 18, 18, + 1, 30, 18, 6, 6, 92, 78, 60, 14, 72, + 60, 56, 13, 44, 34, 18, 1, 30, 18, 6, + 70, 18, 98, 70, 70, 18, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -3430,10 +3732,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 26, 34, 11, 3, 60, 14, 1, 34, 11, 3, 60, 14, 1, 34, 11, 3, 60, 14, 1, 29, 14, 28, 28, 58, 74, 58, 74, 18, 46, 11, - 3, 46, 11, 3, 1, 6, 18, 18, 1, 30, - 18, 4, 4, 92, 78, 60, 14, 74, 60, 58, - 13, 42, 34, 18, 1, 30, 18, 4, 72, 18, - 100, 72, 72, 18, + 3, 46, 11, 3, 3, 3, 1, 6, 18, 18, + 1, 30, 18, 4, 4, 92, 78, 60, 14, 74, + 60, 58, 13, 42, 34, 18, 1, 30, 18, 4, + 72, 18, 100, 72, 72, 18, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -3451,10 +3755,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 26, 32, 13, 3, 60, 14, 1, 32, 13, 3, 60, 14, 1, 32, 13, 3, 60, 14, 1, 31, 14, 28, 28, 58, 74, 58, 74, 16, 46, 13, - 3, 46, 13, 3, 1, 4, 16, 16, 1, 30, - 16, 2, 2, 92, 78, 60, 14, 74, 60, 58, - 15, 38, 32, 16, 1, 30, 16, 2, 72, 16, - 102, 72, 72, 16, + 3, 46, 13, 3, 3, 3, 1, 4, 16, 16, + 1, 30, 16, 2, 2, 92, 78, 60, 14, 74, + 60, 58, 15, 38, 32, 16, 1, 30, 16, 2, + 72, 16, 102, 72, 72, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, + }, { @@ -3472,10 +3778,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 28, 32, 13, 1, 62, 14, 1, 32, 13, 1, 62, 14, 1, 32, 13, 1, 62, 14, 1, 31, 14, 30, 30, 60, 76, 60, 76, 16, 46, 13, - 1, 46, 13, 1, 1, 4, 16, 16, 1, 30, - 16, 2, 2, 94, 78, 62, 14, 76, 62, 60, - 15, 36, 32, 16, 1, 30, 16, 2, 74, 16, - 106, 74, 74, 16, + 1, 46, 13, 1, 1, 1, 1, 4, 16, 16, + 1, 30, 16, 2, 2, 94, 78, 62, 14, 76, + 62, 60, 15, 36, 32, 16, 1, 30, 16, 2, + 74, 16, 106, 74, 74, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, + }, { @@ -3493,10 +3801,12 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 30, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 31, 14, 30, 30, 62, 78, 62, 78, 16, 46, 15, - 1, 46, 15, 1, 1, 2, 16, 16, 1, 30, - 16, 0, 0, 94, 78, 62, 14, 78, 62, 62, - 15, 34, 32, 16, 1, 30, 16, 0, 76, 16, - 108, 76, 76, 16, + 1, 46, 15, 1, 1, 1, 1, 2, 16, 16, + 1, 30, 16, 0, 0, 94, 78, 62, 14, 78, + 62, 62, 15, 34, 32, 16, 1, 30, 16, 0, + 76, 16, 108, 76, 76, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, + }, { @@ -3514,10 +3824,14 @@ const UWORD8 gau1_ihevc_cab_ctxts[IHEVC_NUM_CAB_IDC][IHEVC_MAX_QP][IHEVC_CAB_CTX 30, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 32, 15, 1, 62, 14, 1, 31, 14, 30, 30, 62, 78, 62, 78, 16, 46, 15, - 1, 46, 15, 1, 1, 0, 16, 16, 1, 30, - 16, 0, 0, 94, 78, 62, 14, 78, 62, 62, - 15, 32, 32, 16, 1, 30, 16, 0, 78, 16, - 110, 78, 78, 16, + 1, 46, 15, 1, 1, 1, 1, 0, 16, 16, + 1, 30, 16, 0, 0, 94, 78, 62, 14, 78, + 62, 62, 15, 32, 32, 16, 1, 30, 16, 0, + 78, 16, 110, 78, 78, 16, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, + }, + }, + }; diff --git a/common/ihevc_cabac_tables.h b/common/ihevc_cabac_tables.h index 9ed1a2c..7feac7a 100644 --- a/common/ihevc_cabac_tables.h +++ b/common/ihevc_cabac_tables.h @@ -92,9 +92,13 @@ typedef enum IHEVC_CAB_COEFFY_PREFIX = IHEVC_CAB_COEFFX_PREFIX + 18, IHEVC_CAB_CODED_SUBLK_IDX = IHEVC_CAB_COEFFY_PREFIX + 18, IHEVC_CAB_COEFF_FLAG = IHEVC_CAB_CODED_SUBLK_IDX + 4, - IHEVC_CAB_COEFABS_GRTR1_FLAG = IHEVC_CAB_COEFF_FLAG + 42, + IHEVC_CAB_COEFABS_GRTR1_FLAG = IHEVC_CAB_COEFF_FLAG + 44, IHEVC_CAB_COEFABS_GRTR2_FLAG = IHEVC_CAB_COEFABS_GRTR1_FLAG + 24, - IHEVC_CAB_CTXT_END = IHEVC_CAB_COEFABS_GRTR2_FLAG + 6 + IHEVC_CAB_CCP_LOG2_RES_ABS = IHEVC_CAB_COEFABS_GRTR2_FLAG + 6, + IHEVC_CAB_CCP_RES_SIGN_FLAG = IHEVC_CAB_CCP_LOG2_RES_ABS + 8, + IHEVC_CAB_EXPLICIT_RDPCM_FLAG = IHEVC_CAB_CCP_RES_SIGN_FLAG + 2, + IHEVC_CAB_EXPLICIT_RDPCM_DIR = IHEVC_CAB_EXPLICIT_RDPCM_FLAG + 2, + IHEVC_CAB_CTXT_END = IHEVC_CAB_EXPLICIT_RDPCM_DIR + 2, }IHEVC_CABAC_CTXT_OFFSETS; diff --git a/common/ihevc_chroma_intra_pred.h b/common/ihevc_chroma_intra_pred.h index c4ca13b..8ffa888 100644 --- a/common/ihevc_chroma_intra_pred.h +++ b/common/ihevc_chroma_intra_pred.h @@ -126,7 +126,14 @@ typedef void ihevc_intra_pred_chroma_ref_substitution_ft(UWORD8 *pu1_top_left, WORD32 nt, WORD32 nbr_flags, UWORD8 *pu1_dst, - WORD32 dst_strd); + WORD32 dst_strd, + WORD32 chroma_format_idc); + +typedef void ihevc_intra_pred_chroma_ref_filtering_ft(UWORD8 *pu1_src, + WORD32 nt, + UWORD8 *pu1_dst, + WORD32 mode, + WORD32 intra_smoothing_flags); typedef void ihevc_hbd_intra_pred_chroma_planar_ft( UWORD16 *pu2_ref, @@ -240,6 +247,7 @@ ihevc_intra_pred_chroma_mode_11_to_17_ft ihevc_intra_pred_chroma_mode_11_to_17; ihevc_intra_pred_chroma_mode_19_to_25_ft ihevc_intra_pred_chroma_mode_19_to_25; ihevc_intra_pred_chroma_mode_27_to_33_ft ihevc_intra_pred_chroma_mode_27_to_33; ihevc_intra_pred_chroma_ref_substitution_ft ihevc_intra_pred_chroma_ref_substitution; +ihevc_intra_pred_chroma_ref_filtering_ft ihevc_intra_pred_chroma_ref_filtering; ihevc_hbd_intra_pred_chroma_planar_ft ihevc_hbd_intra_pred_chroma_planar; ihevc_hbd_intra_pred_chroma_dc_ft ihevc_hbd_intra_pred_chroma_dc; diff --git a/common/ihevc_chroma_intra_pred_filters.c b/common/ihevc_chroma_intra_pred_filters.c index fe14d8a..8df5115 100644 --- a/common/ihevc_chroma_intra_pred_filters.c +++ b/common/ihevc_chroma_intra_pred_filters.c @@ -64,6 +64,7 @@ #include "ihevc_typedefs.h" #include "ihevc_macros.h" +#include "ihevc_defs.h" #include "ihevc_func_selector.h" #include "ihevc_platform_macros.h" #include "ihevc_intra_pred.h" @@ -129,7 +130,8 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, WORD32 nt, WORD32 nbr_flags, UWORD8 *pu1_dst, - WORD32 dst_strd) + WORD32 dst_strd, + WORD32 chroma_format_idc) { UWORD8 pu1_ref_u, pu1_ref_v; WORD32 dc_val, i, j; @@ -206,7 +208,7 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, // U-V interleaved Top-top right samples } - if(nt == 4) + if(nt == 4 || (nt == 8 && chroma_format_idc == CHROMA_FMT_IDC_YUV444)) { /* 1 bit extraction for all the neighboring blocks */ tp_left = (nbr_flags & 0x10000) >> 16; @@ -274,8 +276,9 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, } } - else if(nt == 8) + else if(nt == 8 || (nt == 16 && chroma_format_idc == CHROMA_FMT_IDC_YUV444)) { + WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1; WORD32 nbr_flags_temp = 0; nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4) + ((nbr_flags & 0x300) >> 4) @@ -285,16 +288,16 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/ /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */ { - nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 4; /* for bottom left and left */ - if(nbr_id_from_bl == 32) - nbr_id_from_bl = 16; - if(nbr_id_from_bl == 16) + nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * (4 * sub_sample); /* for bottom left and left */ + if(nbr_id_from_bl == 32 * sub_sample) + nbr_id_from_bl = 16 * sub_sample; + if(nbr_id_from_bl == 16 * sub_sample) { /* for top left : 1 pel per nbr bit */ if(!((nbr_flags_temp >> 8) & 0x1)) { nbr_id_from_bl++; - nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4; /* top and top right; 8 pels per nbr bit */ + nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4 * sub_sample; /* top and top right; 8 pels per nbr bit */ } } @@ -313,14 +316,14 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, } /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */ - while(nbr_id_from_bl < ((T8C_4NT)+1)) + while(nbr_id_from_bl < ((T8C_4NT * sub_sample)+1)) { /* To Obtain the next unavailable idx flag after reverse neighbor substitution */ /* Divide by 8 to obtain the original index */ - frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/ + frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/ /* The Top-left flag is at the last bit location of nbr_flags*/ - if(nbr_id_from_bl == (T8C_4NT / 2)) + if(nbr_id_from_bl == (T8C_4NT * sub_sample / 2)) { get_bits = GET_BIT(nbr_flags_temp, 8); @@ -339,22 +342,23 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, UWORD16 *pu2_dst; /* 8 pel substitution (other than TL) */ pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2]; - ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4); + ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample); } } - nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT / 2)) ? 1 : 4; + nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample; } } - else if(nt == 16) + else if(nt == 16 || (nt == 32 && chroma_format_idc == CHROMA_FMT_IDC_YUV444)) { + WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1; /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/ /* as each bit in nbr flags corresponds to 4 pels for bot_left, left, top and topright but 1 pel for topleft */ { - nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4; /* for bottom left and left */ + nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4 * sub_sample; /* for bottom left and left */ - if(nbr_id_from_bl == 32) + if(nbr_id_from_bl == 32 * sub_sample) { /* for top left : 1 pel per nbr bit */ if(!((nbr_flags >> 16) & 0x1)) @@ -362,7 +366,7 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, /* top left not available */ nbr_id_from_bl++; /* top and top right; 4 pels per nbr bit */ - nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4; + nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4 * sub_sample; } } /* Reverse Substitution Process*/ @@ -380,14 +384,14 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, } /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */ - while(nbr_id_from_bl < ((T16C_4NT)+1)) + while(nbr_id_from_bl < ((T16C_4NT * sub_sample)+1)) { /* To Obtain the next unavailable idx flag after reverse neighbor substitution */ /* Devide by 4 to obtain the original index */ - frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/ + frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/ /* The Top-left flag is at the last bit location of nbr_flags*/ - if(nbr_id_from_bl == (T16C_4NT / 2)) + if(nbr_id_from_bl == (T16C_4NT * sub_sample / 2)) { get_bits = GET_BIT(nbr_flags, 16); /* only pel substitution for TL */ @@ -405,17 +409,110 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left, UWORD16 *pu2_dst; /* 4 pel substitution (other than TL) */ pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2]; - ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4); + ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample); } } - nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT / 2)) ? 1 : 4; + nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample; } } } } +/** +******************************************************************************* +* +* @brief +* Intra prediction interpolation filter for chroma ref_filtering (4:4:4) +* +* +* @par Description: +* Reference DC filtering for neighboring chroma samples dependent on TU size and +* mode Refer to section 8.4.4.2.3 in the standard +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] nt +* integer Transform Block size +* +* @param[in] mode +* integer intraprediction mode +* +* @param[in] strong_intra_smoothing_enable_flag +* integer containing intra_smoothing_disabled_flag and strong_smoothing_enable_flag +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void ihevc_intra_pred_chroma_ref_filtering(UWORD8 *pu1_src, + WORD32 nt, + UWORD8 *pu1_dst, + WORD32 mode, + WORD32 intra_smoothing_flag) +{ + WORD32 filter_flag; + WORD32 i; /* Generic indexing variable */ + WORD32 four_nt = 4 * nt; + UWORD8 au1_flt[((4 * MAX_CU_SIZE) + 1) * 2]; + WORD32 intra_smoothing_disabled_flag = (intra_smoothing_flag >> 3) & 0x1; + WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flag & 0x1; + UNUSED(strong_intra_smoothing_enable_flag); + + if(intra_smoothing_disabled_flag) + { + if(pu1_src == pu1_dst) return; + for(i = 0; i < (2 * (four_nt + 1)); i += 2) + { + pu1_dst[i] = pu1_src[i]; + pu1_dst[i + 1] = pu1_src[i + 1]; + } + return; + } + + filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)); + if(0 == filter_flag) + { + if(pu1_src == pu1_dst) return; + for(i = 0; i < (2 * (four_nt + 1)); i += 2) + { + pu1_dst[i] = pu1_src[i]; + pu1_dst[i + 1] = pu1_src[i + 1]; + } + } + else + { + /* Extremities Untouched*/ + au1_flt[0] = pu1_src[0]; + au1_flt[1] = pu1_src[1]; + au1_flt[four_nt * 2] = pu1_src[four_nt * 2]; + au1_flt[(four_nt * 2) + 1] = pu1_src[(four_nt * 2) + 1]; + + for(i = 2; i < four_nt * 2; i += 2) + { + au1_flt[i] = (pu1_src[i - 2] + 2 * pu1_src[i] + pu1_src[i + 2] + 2) >> 2; + au1_flt[i + 1] = (pu1_src[i - 1] + 2 * pu1_src[i + 1] + pu1_src[i + 3] + 2) >> 2; + } + + for(i = 0; i < (2 * (four_nt + 1)); i += 2) + { + pu1_dst[i] = au1_flt[i]; + pu1_dst[i + 1] = au1_flt[i + 1]; + } + } +} + + + /** ******************************************************************************* * diff --git a/common/ihevc_chroma_itrans_recon.h b/common/ihevc_chroma_itrans_recon.h index 7d4958b..cbb2c7a 100644 --- a/common/ihevc_chroma_itrans_recon.h +++ b/common/ihevc_chroma_itrans_recon.h @@ -94,6 +94,15 @@ typedef void ihevc_hbd_chroma_itrans_recon_16x16_ft(WORD16 *pi2_src, WORD32 zero_cols, WORD32 zero_rows, UWORD8 bit_depth); +typedef void ihevc_chroma_itrans_recon_32x32_ft(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); ihevc_chroma_itrans_recon_4x4_ft ihevc_chroma_itrans_recon_4x4; ihevc_hbd_chroma_itrans_recon_4x4_ft ihevc_hbd_chroma_itrans_recon_4x4; @@ -101,6 +110,7 @@ ihevc_chroma_itrans_recon_8x8_ft ihevc_chroma_itrans_recon_8x8; ihevc_hbd_chroma_itrans_recon_8x8_ft ihevc_hbd_chroma_itrans_recon_8x8; ihevc_chroma_itrans_recon_16x16_ft ihevc_chroma_itrans_recon_16x16; ihevc_hbd_chroma_itrans_recon_16x16_ft ihevc_hbd_chroma_itrans_recon_16x16; +ihevc_chroma_itrans_recon_32x32_ft ihevc_chroma_itrans_recon_32x32; /* A9 Q Function Declarations */ ihevc_chroma_itrans_recon_4x4_ft ihevc_chroma_itrans_recon_4x4_a9q; diff --git a/common/ihevc_chroma_itrans_recon_32x32.c b/common/ihevc_chroma_itrans_recon_32x32.c new file mode 100644 index 0000000..ef28452 --- /dev/null +++ b/common/ihevc_chroma_itrans_recon_32x32.c @@ -0,0 +1,1135 @@ +/****************************************************************************** +* +* Copyright (C) 2025 Ittiam Systems Pvt Ltd, Bangalore +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at: +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +******************************************************************************/ +/** + ******************************************************************************* + * @file + * ihevc_chroma_itrans_recon_32x32.c + * + * @brief + * Contains function definitions for 32x32 inverse transform and reconstruction + * of chroma interleaved data. + * + * @author + * 100927 + * + * @par List of Functions: + * - ihevc_chroma_itrans_recon_32x32() + * + * @remarks + * None + * + ******************************************************************************* + */ + +#include +#include +#include "ihevc_typedefs.h" +#include "ihevc_macros.h" +#include "ihevc_platform_macros.h" +#include "ihevc_defs.h" +#include "ihevc_trans_tables.h" +#include "ihevc_chroma_itrans_recon.h" +#include "ihevc_func_selector.h" +#include "ihevc_trans_macros.h" + +/* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */ +/* Data visualization */ +/* U V U V U V U V */ +/* U V U V U V U V */ +/* U V U V U V U V */ +/* U V U V U V U V */ +/* If the pointer points to first byte of above stream (U) , functions will operate on U component */ +/* If the pointer points to second byte of above stream (V) , functions will operate on V component */ + + +/** + ******************************************************************************* + * + * @brief + * This function performs Inverse transform and reconstruction for 32x32 + * input block + * + * @par Description: + * Performs inverse transform and adds the prediction data and clips output + * to 8 bit + * + * @param[in] pi2_src + * Input 32x32 coefficients + * + * @param[in] pi2_tmp + * Temporary 32x32 buffer for storing inverse transform + * 1st stage output + * + * @param[in] pu1_pred + * Prediction 32x32 block + * + * @param[out] pu1_dst + * Output 32x32 block + * + * @param[in] src_strd + * Input stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Output Stride + * + * @param[in] shift + * Output shift + * + * @param[in] zero_cols + * Zero columns in pi2_src + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + + +void ihevc_chroma_itrans_recon_32x32(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + WORD32 j, k; + WORD32 e[16], o[16]; + WORD32 ee[8], eo[8]; + WORD32 eee[4], eeo[4]; + WORD32 eeee[2], eeeo[2]; + WORD32 add; + WORD32 shift; + WORD16 *pi2_tmp_orig; + WORD32 trans_size; + WORD32 row_limit_2nd_stage, zero_rows_2nd_stage = zero_cols; + + trans_size = TRANS_SIZE_32; + pi2_tmp_orig = pi2_tmp; + + if((zero_cols & 0xFFFFFFF0) == 0xFFFFFFF0) + row_limit_2nd_stage = 4; + else if((zero_cols & 0xFFFFFF00) == 0xFFFFFF00) + row_limit_2nd_stage = 8; + else + row_limit_2nd_stage = TRANS_SIZE_32; + + if((zero_rows & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_32[3][k] + * pi2_src[3 * src_strd]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_32[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_32[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_32[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_32[15][k] + * pi2_tmp[15 * trans_size] + + g_ai2_ihevc_trans_32[17][k] + * pi2_tmp[17 * trans_size] + + g_ai2_ihevc_trans_32[19][k] + * pi2_tmp[19 * trans_size] + + g_ai2_ihevc_trans_32[21][k] + * pi2_tmp[21 * trans_size] + + g_ai2_ihevc_trans_32[23][k] + * pi2_tmp[23 * trans_size] + + g_ai2_ihevc_trans_32[25][k] + * pi2_tmp[25 * trans_size] + + g_ai2_ihevc_trans_32[27][k] + * pi2_tmp[27 * trans_size] + + g_ai2_ihevc_trans_32[29][k] + * pi2_tmp[29 * trans_size] + + g_ai2_ihevc_trans_32[31][k] + * pi2_tmp[31 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_32[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_32[14][k] + * pi2_tmp[14 * trans_size] + + g_ai2_ihevc_trans_32[18][k] + * pi2_tmp[18 * trans_size] + + g_ai2_ihevc_trans_32[22][k] + * pi2_tmp[22 * trans_size] + + g_ai2_ihevc_trans_32[26][k] + * pi2_tmp[26 * trans_size] + + g_ai2_ihevc_trans_32[30][k] + * pi2_tmp[30 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_32[12][k] + * pi2_tmp[12 * trans_size] + + g_ai2_ihevc_trans_32[20][k] + * pi2_tmp[20 * trans_size] + + g_ai2_ihevc_trans_32[28][k] + * pi2_tmp[28 * trans_size]; + } + eeeo[0] = + g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][0] + * pi2_tmp[24 + * trans_size]; + eeeo[1] = + g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][1] + * pi2_tmp[24 + * trans_size]; + eeee[0] = + g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_tmp[16 + * trans_size]; + eeee[1] = + g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_tmp[16 + * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + } + else if((zero_rows & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_32[3][k] + * pi2_src[3 * src_strd] + + g_ai2_ihevc_trans_32[5][k] + * pi2_src[5 * src_strd] + + g_ai2_ihevc_trans_32[7][k] + * pi2_src[7 * src_strd]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_32[6][k] + * pi2_src[6 * src_strd]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_32[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_32[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_32[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_32[15][k] + * pi2_tmp[15 * trans_size] + + g_ai2_ihevc_trans_32[17][k] + * pi2_tmp[17 * trans_size] + + g_ai2_ihevc_trans_32[19][k] + * pi2_tmp[19 * trans_size] + + g_ai2_ihevc_trans_32[21][k] + * pi2_tmp[21 * trans_size] + + g_ai2_ihevc_trans_32[23][k] + * pi2_tmp[23 * trans_size] + + g_ai2_ihevc_trans_32[25][k] + * pi2_tmp[25 * trans_size] + + g_ai2_ihevc_trans_32[27][k] + * pi2_tmp[27 * trans_size] + + g_ai2_ihevc_trans_32[29][k] + * pi2_tmp[29 * trans_size] + + g_ai2_ihevc_trans_32[31][k] + * pi2_tmp[31 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_32[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_32[14][k] + * pi2_tmp[14 * trans_size] + + g_ai2_ihevc_trans_32[18][k] + * pi2_tmp[18 * trans_size] + + g_ai2_ihevc_trans_32[22][k] + * pi2_tmp[22 * trans_size] + + g_ai2_ihevc_trans_32[26][k] + * pi2_tmp[26 * trans_size] + + g_ai2_ihevc_trans_32[30][k] + * pi2_tmp[30 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_32[12][k] + * pi2_tmp[12 * trans_size] + + g_ai2_ihevc_trans_32[20][k] + * pi2_tmp[20 * trans_size] + + g_ai2_ihevc_trans_32[28][k] + * pi2_tmp[28 * trans_size]; + } + eeeo[0] = + g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][0] + * pi2_tmp[24 + * trans_size]; + eeeo[1] = + g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][1] + * pi2_tmp[24 + * trans_size]; + eeee[0] = + g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_tmp[16 + * trans_size]; + eeee[1] = + g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_tmp[16 + * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + } + else /* All rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_32[3][k] + * pi2_src[3 * src_strd] + + g_ai2_ihevc_trans_32[5][k] + * pi2_src[5 * src_strd] + + g_ai2_ihevc_trans_32[7][k] + * pi2_src[7 * src_strd] + + g_ai2_ihevc_trans_32[9][k] + * pi2_src[9 * src_strd] + + g_ai2_ihevc_trans_32[11][k] + * pi2_src[11 * src_strd] + + g_ai2_ihevc_trans_32[13][k] + * pi2_src[13 * src_strd] + + g_ai2_ihevc_trans_32[15][k] + * pi2_src[15 * src_strd] + + g_ai2_ihevc_trans_32[17][k] + * pi2_src[17 * src_strd] + + g_ai2_ihevc_trans_32[19][k] + * pi2_src[19 * src_strd] + + g_ai2_ihevc_trans_32[21][k] + * pi2_src[21 * src_strd] + + g_ai2_ihevc_trans_32[23][k] + * pi2_src[23 * src_strd] + + g_ai2_ihevc_trans_32[25][k] + * pi2_src[25 * src_strd] + + g_ai2_ihevc_trans_32[27][k] + * pi2_src[27 * src_strd] + + g_ai2_ihevc_trans_32[29][k] + * pi2_src[29 * src_strd] + + g_ai2_ihevc_trans_32[31][k] + * pi2_src[31 * src_strd]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_32[6][k] + * pi2_src[6 * src_strd] + + g_ai2_ihevc_trans_32[10][k] + * pi2_src[10 * src_strd] + + g_ai2_ihevc_trans_32[14][k] + * pi2_src[14 * src_strd] + + g_ai2_ihevc_trans_32[18][k] + * pi2_src[18 * src_strd] + + g_ai2_ihevc_trans_32[22][k] + * pi2_src[22 * src_strd] + + g_ai2_ihevc_trans_32[26][k] + * pi2_src[26 * src_strd] + + g_ai2_ihevc_trans_32[30][k] + * pi2_src[30 * src_strd]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd] + + g_ai2_ihevc_trans_32[12][k] + * pi2_src[12 * src_strd] + + g_ai2_ihevc_trans_32[20][k] + * pi2_src[20 * src_strd] + + g_ai2_ihevc_trans_32[28][k] + * pi2_src[28 * src_strd]; + } + eeeo[0] = g_ai2_ihevc_trans_32[8][0] * pi2_src[8 * src_strd] + + g_ai2_ihevc_trans_32[24][0] + * pi2_src[24 * src_strd]; + eeeo[1] = g_ai2_ihevc_trans_32[8][1] * pi2_src[8 * src_strd] + + g_ai2_ihevc_trans_32[24][1] + * pi2_src[24 * src_strd]; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_src[16 * src_strd]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_src[16 * src_strd]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_32[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_32[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_32[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_32[15][k] + * pi2_tmp[15 * trans_size] + + g_ai2_ihevc_trans_32[17][k] + * pi2_tmp[17 * trans_size] + + g_ai2_ihevc_trans_32[19][k] + * pi2_tmp[19 * trans_size] + + g_ai2_ihevc_trans_32[21][k] + * pi2_tmp[21 * trans_size] + + g_ai2_ihevc_trans_32[23][k] + * pi2_tmp[23 * trans_size] + + g_ai2_ihevc_trans_32[25][k] + * pi2_tmp[25 * trans_size] + + g_ai2_ihevc_trans_32[27][k] + * pi2_tmp[27 * trans_size] + + g_ai2_ihevc_trans_32[29][k] + * pi2_tmp[29 * trans_size] + + g_ai2_ihevc_trans_32[31][k] + * pi2_tmp[31 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_32[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_32[14][k] + * pi2_tmp[14 * trans_size] + + g_ai2_ihevc_trans_32[18][k] + * pi2_tmp[18 * trans_size] + + g_ai2_ihevc_trans_32[22][k] + * pi2_tmp[22 * trans_size] + + g_ai2_ihevc_trans_32[26][k] + * pi2_tmp[26 * trans_size] + + g_ai2_ihevc_trans_32[30][k] + * pi2_tmp[30 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_32[12][k] + * pi2_tmp[12 * trans_size] + + g_ai2_ihevc_trans_32[20][k] + * pi2_tmp[20 * trans_size] + + g_ai2_ihevc_trans_32[28][k] + * pi2_tmp[28 * trans_size]; + } + eeeo[0] = + g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][0] + * pi2_tmp[24 + * trans_size]; + eeeo[1] = + g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][1] + * pi2_tmp[24 + * trans_size]; + eeee[0] = + g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_tmp[16 + * trans_size]; + eeee[1] = + g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_tmp[16 + * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); + itrans_out = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2])); + } + pi2_tmp++; + pu1_pred += pred_strd; + pu1_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + } +} + diff --git a/common/ihevc_chroma_recon.c b/common/ihevc_chroma_recon.c index 4a1e9ee..d78a476 100644 --- a/common/ihevc_chroma_recon.c +++ b/common/ihevc_chroma_recon.c @@ -306,3 +306,157 @@ void ihevc_chroma_recon_16x16(WORD16 *pi2_src, } } +/** + ******************************************************************************* + * + * @brief + * This function performs reconstruction for 32x32 input block + * + * @par Description: + * Performs reconstruction of 32x32 input block by adding adding prediction + * data to input and clipping it to 8 bit + * + * @param[in] pi2_src + * Input 32x32 coefficients + * + * @param[in] pu1_pred + * Prediction 32x32 block + * + * @param[out] pu1_dst + * Output 32x32 block + * + * @param[in] src_strd + * Input stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Output Stride + * + * @param[in] shift + * Output shift + * + * @param[in] zero_cols + * Zero columns in pi2_tmp + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + + +void ihevc_chroma_recon_32x32(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols) +{ + WORD32 i, j; + WORD32 trans_size; + + trans_size = TRANS_SIZE_32; + + /* Reconstruction */ + + for(i = 0; i < trans_size; i++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + for(j = 0; j < trans_size; j++) + { + pu1_dst[j * dst_strd] = pu1_pred[j * pred_strd]; + } + } + else + { + for(j = 0; j < trans_size; j++) + { + pu1_dst[j * dst_strd] = + CLIP_U8(pi2_src[j * src_strd] + pu1_pred[j * pred_strd]); + } + } + pi2_src++; + pu1_dst += 2; + pu1_pred += 2; + zero_cols = zero_cols >> 1; + } +} + +/** + ****************************************************************************** + * + * @brief Constructs chroma recon with Cross Component Prediction (CCP) + * + * @par Description + * This routine uses reconstructed luma residual samples to predict chroma + * residual samples as per HEVC Specification Section 8.6.6. It scales the + * luma residual by a signaled alpha value and adds it to the chroma residual + * prior to final reconstruction. + * + * @param[in] pi2_luma_res + * pointer to the luma residual + * + * @param[in] pi2_chroma_res + * pointer to the chroma residual + * + * @param[in] pu1_pred + * prediction block + * + * @param[in] pu1_dst + * destination block + * + * @param[in] alpha + * scaling factor for the luma residual + * + * @param[in] trans_size + * transform size + * + * @param[in] luma_res_stride + * stride of the luma residual buffer + * + * @param[in] chroma_res_stride + * stride of the chroma residual buffer + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Output Stride + * + * @return success or failure error code + * + ****************************************************************************** + */ +void ihevc_chroma_recon_nxn_ccp(WORD16 *pi2_luma_res, + WORD16 *pi2_chroma_res, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 alpha, + WORD32 trans_size, + WORD32 luma_res_stride, + WORD32 chroma_res_stride, + WORD32 pred_stride, + WORD32 dst_stride) +{ + WORD32 i, j; + + for(i = 0; i < trans_size; i++) + { + for(j = 0; j < trans_size; j++) + { + WORD32 res = (alpha * pi2_luma_res[j]) >> 3; + pu1_dst[j * 2] = CLIP_U8(pu1_pred[j * 2] + (pi2_chroma_res[j] + res)); + } + pi2_luma_res += luma_res_stride; + pi2_chroma_res += chroma_res_stride; + pu1_dst += dst_stride; + pu1_pred += pred_stride; + } +} diff --git a/common/ihevc_chroma_recon.h b/common/ihevc_chroma_recon.h index b4ece06..5363d3c 100644 --- a/common/ihevc_chroma_recon.h +++ b/common/ihevc_chroma_recon.h @@ -88,6 +88,13 @@ typedef void ihevc_hbd_chroma_recon_16x16_ft(WORD16 *pi2_src, WORD32 dst_strd, WORD32 zero_cols, UWORD8 bit_depth); +typedef void ihevc_chroma_recon_32x32_ft(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols); ihevc_chroma_recon_4x4_ft ihevc_chroma_recon_4x4; ihevc_hbd_chroma_recon_4x4_ft ihevc_hbd_chroma_recon_4x4; @@ -95,5 +102,17 @@ ihevc_chroma_recon_8x8_ft ihevc_chroma_recon_8x8; ihevc_hbd_chroma_recon_8x8_ft ihevc_hbd_chroma_recon_8x8; ihevc_chroma_recon_16x16_ft ihevc_chroma_recon_16x16; ihevc_hbd_chroma_recon_16x16_ft ihevc_hbd_chroma_recon_16x16; +ihevc_chroma_recon_32x32_ft ihevc_chroma_recon_32x32; + +void ihevc_chroma_recon_nxn_ccp(WORD16 *pi2_luma_res, + WORD16 *pi2_chroma_res, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 alpha, + WORD32 trans_size, + WORD32 luma_res_stride, + WORD32 chroma_res_stride, + WORD32 pred_stride, + WORD32 dst_stride); #endif /*_IHEVC_CHROMA_RECON_H_*/ diff --git a/common/ihevc_defs.h b/common/ihevc_defs.h index faa3704..d0b11b7 100644 --- a/common/ihevc_defs.h +++ b/common/ihevc_defs.h @@ -40,7 +40,10 @@ /*****************************************************************************/ enum { + IHEVC_PROFILE_UNKNOWN = -1, IHEVC_PROFILE_MAIN = 0, + IHEVC_PROFILE_MAIN_STILL = 1, + IHEVC_PROFILE_MAIN_REXT = 2, }; enum diff --git a/common/ihevc_inter_pred.h b/common/ihevc_inter_pred.h index e84e912..d75d3f3 100644 --- a/common/ihevc_inter_pred.h +++ b/common/ihevc_inter_pred.h @@ -46,6 +46,9 @@ #define REF_WIDTH 1280 #define REF_HEIGHT 720 +extern WORD8 gai1_ihevc_luma_filter[4][NTAPS_LUMA]; +extern WORD8 gai1_ihevc_chroma_filter[8][NTAPS_LUMA]; + /*****************************************************************************/ /* Function Declarations */ /*****************************************************************************/ diff --git a/common/ihevc_inter_pred_filters.c b/common/ihevc_inter_pred_filters.c index 717bb53..75d0ed9 100644 --- a/common/ihevc_inter_pred_filters.c +++ b/common/ihevc_inter_pred_filters.c @@ -61,6 +61,26 @@ #include "ihevc_func_selector.h" #include "ihevc_inter_pred.h" + +WORD8 gai1_ihevc_luma_filter[4][NTAPS_LUMA] = +{ + { 0, 0, 0, 64, 0, 0, 0, 0 }, + { -1, 4, -10, 58, 17, -5, 1, 0 }, + { -1, 4, -11, 40, 40, -11, 4, -1 }, + { 0, 1, -5, 17, 58, -10, 4, -1 } }; + +/* The filter uses only the first four elements in each array */ +WORD8 gai1_ihevc_chroma_filter[8][NTAPS_LUMA] = +{ + { 0, 64, 0, 0, 0, 0, 0, 0 }, + { -2, 58, 10, -2, 0, 0, 0, 0 }, + { -4, 54, 16, -2, 0, 0, 0, 0 }, + { -6, 46, 28, -4, 0, 0, 0, 0 }, + { -4, 36, 36, -4, 0, 0, 0, 0 }, + { -4, 28, 46, -6, 0, 0, 0, 0 }, + { -2, 16, 54, -4, 0, 0, 0, 0 }, + { -2, 10, 58, -2, 0, 0, 0, 0 } }; + /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ diff --git a/common/ihevc_intra_pred.h b/common/ihevc_intra_pred.h index c2902ea..6cd285b 100644 --- a/common/ihevc_intra_pred.h +++ b/common/ihevc_intra_pred.h @@ -159,7 +159,7 @@ typedef void ihevc_intra_pred_ref_filtering_ft(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst, WORD32 mode, - WORD32 strong_intra_smoothing_enable_flag); + WORD32 intra_smoothing_flags); typedef void ihevc_hbd_intra_pred_luma_planar_ft( UWORD16 *pu2_ref, diff --git a/common/ihevc_intra_pred_filters.c b/common/ihevc_intra_pred_filters.c index 41e4a1f..1610385 100644 --- a/common/ihevc_intra_pred_filters.c +++ b/common/ihevc_intra_pred_filters.c @@ -627,6 +627,11 @@ void ihevc_intra_pred_luma_ref_substitution(UWORD8 *pu1_top_left, * @param[in] mode * integer intraprediction mode * +* @param[in] intra_smoothing_flags +* integer bit 3 indicates if intra smoothing is enabled/disabled +* unconditionally. this is applicable to frext profiles only +* bit 0 indicates strong intra smoothing enabled/disabled +* * @returns * * @remarks @@ -640,7 +645,7 @@ void ihevc_intra_pred_ref_filtering(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst, WORD32 mode, - WORD32 strong_intra_smoothing_enable_flag) + WORD32 intra_smoothing_flags) { WORD32 filter_flag; WORD32 i; /* Generic indexing variable */ @@ -651,9 +656,11 @@ void ihevc_intra_pred_ref_filtering(UWORD8 *pu1_src, WORD32 abs_cond_top_flag = 0; /*WORD32 dc_val = 1 << (BIT_DEPTH - 5);*/ WORD32 dc_val = 1 << (8 - 5); - //WORD32 strong_intra_smoothing_enable_flag = 1; + WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3); + WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1; - filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)); + filter_flag = intra_smoothing_disabled ? + 0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2))); if(0 == filter_flag) { if(pu1_src == pu1_dst) @@ -943,8 +950,8 @@ void ihevc_intra_pred_luma_dc(UWORD8 *pu1_ref, * @param[in] nt * integer Transform Block size * -* @param[in] mode -* integer intraprediction mode +* @param[in] disable_boundary_filter +* disable boundary filtering * * @returns * @@ -960,17 +967,16 @@ void ihevc_intra_pred_luma_horz(UWORD8 *pu1_ref, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, - WORD32 mode) + WORD32 disable_boundary_filter) { WORD32 row, col; WORD32 two_nt; WORD16 s2_predpixel; - UNUSED(mode); UNUSED(src_strd); two_nt = 2 * nt; - if(nt == 32) + if(nt == 32 || disable_boundary_filter) { for(row = 0; row < nt; row++) for(col = 0; col < nt; col++) @@ -1023,8 +1029,8 @@ void ihevc_intra_pred_luma_horz(UWORD8 *pu1_ref, * @param[in] nt * integer Transform Block size * -* @param[in] mode -* integer intraprediction mode +* @param[in] disable_boundary_filter +* disable boundary filtering * * @returns * @@ -1040,15 +1046,14 @@ void ihevc_intra_pred_luma_ver(UWORD8 *pu1_ref, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, - WORD32 mode) + WORD32 disable_boundary_filter) { WORD32 row, col; WORD16 s2_predpixel; WORD32 two_nt = 2 * nt; - UNUSED(mode); UNUSED(src_strd); - if(nt == 32) + if(nt == 32 || disable_boundary_filter) { /* Replication to next columns*/ for(row = 0; row < nt; row++) diff --git a/common/ihevc_itrans_res.c b/common/ihevc_itrans_res.c new file mode 100644 index 0000000..c10450a --- /dev/null +++ b/common/ihevc_itrans_res.c @@ -0,0 +1,2493 @@ +/****************************************************************************** +* +* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at: +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +******************************************************************************/ +/** + ******************************************************************************* + * @file + * ihevc_itrans_res.c + * + * @brief + * Contains function definitions for inverse transform + * + * @author + * 100470 + * + * @par List of Functions: + * - ihevc_itrans_res_4x4_ttype1() + * - ihevc_itrans_res_4x4() + * - ihevcd_itrans_res_dc() + * - ihevc_itrans_res_8x8() + * - ihevc_itrans_res_16x16() + * - ihevc_itrans_res_32x32() + * - ihevc_res_4x4_rotate() + * - ihevc_res_nxn_copy() + * - ihevc_res_nxn_rdpcm_horz() + * - ihevc_res_nxn_rdpcm_vert() + * + * @remarks + * None + * + ******************************************************************************* + */ + +#include +#include + +#include "ihevc_typedefs.h" +#include "ihevc_macros.h" +#include "ihevc_platform_macros.h" +#include "ihevc_defs.h" +#include "ihevc_trans_tables.h" +#include "ihevc_func_selector.h" +#include "ihevc_trans_macros.h" +#include "ihevc_itrans_res.h" + + +void ihevc_itrans_res_4x4_ttype1(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + WORD32 i, c[4]; + WORD32 add; + WORD32 shift; + WORD16 *pi2_tmp_orig; + WORD32 trans_size; + UNUSED(zero_rows); + trans_size = TRANS_SIZE_4; + + pi2_tmp_orig = pi2_tmp; + + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(i = 0; i < trans_size; i++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + // Intermediate Variables + c[0] = pi2_src[0] + pi2_src[2 * src_strd]; + c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd]; + c[2] = pi2_src[0] - pi2_src[3 * src_strd]; + c[3] = 74 * pi2_src[src_strd]; + + pi2_tmp[0] = + CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); + pi2_tmp[1] = + CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); + pi2_tmp[2] = + CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> shift); + pi2_tmp[3] = + CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + + for(i = 0; i < trans_size; i++) + { + WORD32 itrans_out; + // Intermediate Variables + c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size]; + c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]; + c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size]; + c[3] = 74 * pi2_tmp[trans_size]; + + pi2_dst[0] = + CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); + + pi2_dst[1] = + CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); + + pi2_dst[2] = + CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift); + + pi2_dst[3] = + CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); + + pi2_tmp++; + pi2_dst += dst_strd; + } +} + + +void ihevc_itrans_res_4x4(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) + +{ + WORD32 j; + WORD32 e[2], o[2]; + WORD32 add; + WORD32 shift; + WORD16 *pi2_tmp_orig; + WORD32 trans_size; + UNUSED(zero_rows); + trans_size = TRANS_SIZE_4; + + pi2_tmp_orig = pi2_tmp; + + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < trans_size; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd] + + g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd]; + o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd] + + g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd]; + e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0] + + g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd]; + e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0] + + g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd]; + + pi2_tmp[0] = + CLIP_S16(((e[0] + o[0] + add) >> shift)); + pi2_tmp[1] = + CLIP_S16(((e[1] + o[1] + add) >> shift)); + pi2_tmp[2] = + CLIP_S16(((e[1] - o[1] + add) >> shift)); + pi2_tmp[3] = + CLIP_S16(((e[0] - o[0] + add) >> shift)); + + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + + for(j = 0; j < trans_size; j++) + { + WORD32 itrans_out; + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_4[3][0] * pi2_tmp[3 * trans_size]; + o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_4[3][1] * pi2_tmp[3 * trans_size]; + e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_4[2][0] * pi2_tmp[2 * trans_size]; + e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_4[2][1] * pi2_tmp[2 * trans_size]; + + pi2_dst[0] = + CLIP_S16(((e[0] + o[0] + add) >> shift)); + + pi2_dst[1] = + CLIP_S16(((e[1] + o[1] + add) >> shift)); + + pi2_dst[2] = + CLIP_S16(((e[1] - o[1] + add) >> shift)); + + pi2_dst[3] = + CLIP_S16(((e[0] - o[0] + add) >> shift)); + + pi2_tmp++; + pi2_dst += dst_strd; + + } +} + + +void ihevcd_itrans_res_dc(WORD16 *pi2_dst, + WORD32 dst_strd, + WORD32 log2_trans_size, + WORD16 i2_coeff_value) +{ + WORD32 row, col; + WORD32 add, shift; + WORD32 dc_value, quant_out; + WORD32 trans_size; + + trans_size = (1 << log2_trans_size); + + quant_out = i2_coeff_value; + + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + dc_value = CLIP_S16((quant_out * 64 + add) >> shift); + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + dc_value = CLIP_S16((dc_value * 64 + add) >> shift); + + for(row = 0; row < trans_size; row++) + for(col = 0; col < trans_size; col++) + pi2_dst[row * dst_strd + col] = dc_value; + +} + + +void ihevc_itrans_res_8x8(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + WORD32 j, k; + WORD32 e[4], o[4]; + WORD32 ee[2], eo[2]; + WORD32 add; + WORD32 shift; + WORD16 *pi2_tmp_orig; + WORD32 trans_size; + WORD32 zero_rows_2nd_stage = zero_cols; + WORD32 row_limit_2nd_stage; + + trans_size = TRANS_SIZE_8; + + pi2_tmp_orig = pi2_tmp; + + if((zero_cols & 0xF0) == 0xF0) + row_limit_2nd_stage = 4; + else + row_limit_2nd_stage = TRANS_SIZE_8; + + + if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_8[3][k] + * pi2_src[3 * src_strd]; + } + eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]; + eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]; + ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]; + ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + e[0] = ee[0] + eo[0]; + e[3] = ee[0] - eo[0]; + e[1] = ee[1] + eo[1]; + e[2] = ee[1] - eo[1]; + for(k = 0; k < 4; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 4] = + CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size]; + } + eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]; + eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]; + ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]; + ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + e[0] = ee[0] + eo[0]; + e[3] = ee[0] - eo[0]; + e[1] = ee[1] + eo[1]; + e[2] = ee[1] - eo[1]; + for(k = 0; k < 4; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 4] = + CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_8[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_8[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_8[7][k] + * pi2_tmp[7 * trans_size]; + } + + eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size]; + eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size]; + ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size]; + ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + e[0] = ee[0] + eo[0]; + e[3] = ee[0] - eo[0]; + e[1] = ee[1] + eo[1]; + e[2] = ee[1] - eo[1]; + for(k = 0; k < 4; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 4] = + CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + } + else /* All rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_8[3][k] + * pi2_src[3 * src_strd] + + g_ai2_ihevc_trans_8[5][k] + * pi2_src[5 * src_strd] + + g_ai2_ihevc_trans_8[7][k] + * pi2_src[7 * src_strd]; + } + + eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd]; + eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd]; + ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0] + + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd]; + ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0] + + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + e[0] = ee[0] + eo[0]; + e[3] = ee[0] - eo[0]; + e[1] = ee[1] + eo[1]; + e[2] = ee[1] - eo[1]; + for(k = 0; k < 4; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 4] = + CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size]; + } + eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]; + eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]; + ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]; + ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + e[0] = ee[0] + eo[0]; + e[3] = ee[0] - eo[0]; + e[1] = ee[1] + eo[1]; + e[2] = ee[1] - eo[1]; + for(k = 0; k < 4; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 4] = + CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_8[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_8[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_8[7][k] + * pi2_tmp[7 * trans_size]; + } + + eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size]; + eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size]; + ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size]; + ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + e[0] = ee[0] + eo[0]; + e[3] = ee[0] - eo[0]; + e[1] = ee[1] + eo[1]; + e[2] = ee[1] - eo[1]; + for(k = 0; k < 4; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 4] = + CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + } +} + + +void ihevc_itrans_res_16x16(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + WORD32 j, k; + WORD32 e[8], o[8]; + WORD32 ee[4], eo[4]; + WORD32 eee[2], eeo[2]; + WORD32 add; + WORD32 shift; + WORD16 *pi2_tmp_orig; + WORD32 trans_size; + WORD32 zero_rows_2nd_stage = zero_cols; + WORD32 row_limit_2nd_stage; + + if((zero_cols & 0xFFF0) == 0xFFF0) + row_limit_2nd_stage = 4; + else if((zero_cols & 0xFF00) == 0xFF00) + row_limit_2nd_stage = 8; + else + row_limit_2nd_stage = TRANS_SIZE_16; + + trans_size = TRANS_SIZE_16; + pi2_tmp_orig = pi2_tmp; + if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */ + { + /* Inverse Transform 1st stage */ + /************************************************************************************************/ + /**********************************START - IT_RECON_16x16****************************************/ + /************************************************************************************************/ + + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_16[3][k] + * pi2_src[3 * src_strd]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]; + } + eeo[0] = 0; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; + eeo[1] = 0; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + + if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; + } + eeo[0] = 0; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; + eeo[1] = 0; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_16[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_16[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_16[6][k] + * pi2_tmp[6 * trans_size]; + } + eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; + eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_16[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_16[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_16[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_16[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_16[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_16[15][k] + * pi2_tmp[15 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_16[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_16[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_16[14][k] + * pi2_tmp[14 * trans_size]; + } + eeo[0] = + g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_16[12][0] + * pi2_tmp[12 + * trans_size]; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; + eeo[1] = + g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_16[12][1] + * pi2_tmp[12 + * trans_size]; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_16x16****************************************/ + /************************************************************************************************/ + } + else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */ + { + /* Inverse Transform 1st stage */ + /************************************************************************************************/ + /**********************************START - IT_RECON_16x16****************************************/ + /************************************************************************************************/ + + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_16[3][k] + * pi2_src[3 * src_strd] + + g_ai2_ihevc_trans_16[5][k] + * pi2_src[5 * src_strd] + + g_ai2_ihevc_trans_16[7][k] + * pi2_src[7 * src_strd]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_16[6][k] + * pi2_src[6 * src_strd]; + } + eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; + eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + + if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; + } + eeo[0] = 0; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; + eeo[1] = 0; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_16[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_16[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_16[6][k] + * pi2_tmp[6 * trans_size]; + } + eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; + eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_16[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_16[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_16[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_16[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_16[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_16[15][k] + * pi2_tmp[15 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_16[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_16[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_16[14][k] + * pi2_tmp[14 * trans_size]; + } + eeo[0] = + g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_16[12][0] + * pi2_tmp[12 + * trans_size]; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; + eeo[1] = + g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_16[12][1] + * pi2_tmp[12 + * trans_size]; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_16x16****************************************/ + /************************************************************************************************/ + } + else /* All rows of input are non-zero */ + { + /* Inverse Transform 1st stage */ + /************************************************************************************************/ + /**********************************START - IT_RECON_16x16****************************************/ + /************************************************************************************************/ + + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_16[3][k] + * pi2_src[3 * src_strd] + + g_ai2_ihevc_trans_16[5][k] + * pi2_src[5 * src_strd] + + g_ai2_ihevc_trans_16[7][k] + * pi2_src[7 * src_strd] + + g_ai2_ihevc_trans_16[9][k] + * pi2_src[9 * src_strd] + + g_ai2_ihevc_trans_16[11][k] + * pi2_src[11 * src_strd] + + g_ai2_ihevc_trans_16[13][k] + * pi2_src[13 * src_strd] + + g_ai2_ihevc_trans_16[15][k] + * pi2_src[15 * src_strd]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_16[6][k] + * pi2_src[6 * src_strd] + + g_ai2_ihevc_trans_16[10][k] + * pi2_src[10 * src_strd] + + g_ai2_ihevc_trans_16[14][k] + * pi2_src[14 * src_strd]; + } + eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] + + g_ai2_ihevc_trans_16[12][0] + * pi2_src[12 * src_strd]; + eee[0] = + g_ai2_ihevc_trans_16[0][0] * pi2_src[0] + + g_ai2_ihevc_trans_16[8][0] + * pi2_src[8 + * src_strd]; + eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] + + g_ai2_ihevc_trans_16[12][1] + * pi2_src[12 * src_strd]; + eee[1] = + g_ai2_ihevc_trans_16[0][1] * pi2_src[0] + + g_ai2_ihevc_trans_16[8][1] + * pi2_src[8 + * src_strd]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + + if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; + } + eeo[0] = 0; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; + eeo[1] = 0; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_16[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_16[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_16[6][k] + * pi2_tmp[6 * trans_size]; + } + eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; + eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 8; k++) + { + o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_16[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_16[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_16[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_16[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_16[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_16[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_16[15][k] + * pi2_tmp[15 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_16[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_16[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_16[14][k] + * pi2_tmp[14 * trans_size]; + } + eeo[0] = + g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_16[12][0] + * pi2_tmp[12 + * trans_size]; + eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; + eeo[1] = + g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_16[12][1] + * pi2_tmp[12 + * trans_size]; + eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + for(k = 0; k < 2; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 2] = eee[1 - k] - eeo[1 - k]; + } + for(k = 0; k < 4; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 4] = ee[3 - k] - eo[3 - k]; + } + for(k = 0; k < 8; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 8] = + CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_16x16****************************************/ + /************************************************************************************************/ + } + +} + + +void ihevc_itrans_res_32x32(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + WORD32 j, k; + WORD32 e[16], o[16]; + WORD32 ee[8], eo[8]; + WORD32 eee[4], eeo[4]; + WORD32 eeee[2], eeeo[2]; + WORD32 add; + WORD32 shift; + WORD16 *pi2_tmp_orig; + WORD32 trans_size; + WORD32 zero_rows_2nd_stage = zero_cols; + WORD32 row_limit_2nd_stage; + + trans_size = TRANS_SIZE_32; + pi2_tmp_orig = pi2_tmp; + + if((zero_cols & 0xFFFFFFF0) == 0xFFFFFFF0) + row_limit_2nd_stage = 4; + else if((zero_cols & 0xFFFFFF00) == 0xFFFFFF00) + row_limit_2nd_stage = 8; + else + row_limit_2nd_stage = TRANS_SIZE_32; + + if((zero_rows & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_32[3][k] + * pi2_src[3 * src_strd]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_32[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_32[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_32[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_32[15][k] + * pi2_tmp[15 * trans_size] + + g_ai2_ihevc_trans_32[17][k] + * pi2_tmp[17 * trans_size] + + g_ai2_ihevc_trans_32[19][k] + * pi2_tmp[19 * trans_size] + + g_ai2_ihevc_trans_32[21][k] + * pi2_tmp[21 * trans_size] + + g_ai2_ihevc_trans_32[23][k] + * pi2_tmp[23 * trans_size] + + g_ai2_ihevc_trans_32[25][k] + * pi2_tmp[25 * trans_size] + + g_ai2_ihevc_trans_32[27][k] + * pi2_tmp[27 * trans_size] + + g_ai2_ihevc_trans_32[29][k] + * pi2_tmp[29 * trans_size] + + g_ai2_ihevc_trans_32[31][k] + * pi2_tmp[31 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_32[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_32[14][k] + * pi2_tmp[14 * trans_size] + + g_ai2_ihevc_trans_32[18][k] + * pi2_tmp[18 * trans_size] + + g_ai2_ihevc_trans_32[22][k] + * pi2_tmp[22 * trans_size] + + g_ai2_ihevc_trans_32[26][k] + * pi2_tmp[26 * trans_size] + + g_ai2_ihevc_trans_32[30][k] + * pi2_tmp[30 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_32[12][k] + * pi2_tmp[12 * trans_size] + + g_ai2_ihevc_trans_32[20][k] + * pi2_tmp[20 * trans_size] + + g_ai2_ihevc_trans_32[28][k] + * pi2_tmp[28 * trans_size]; + } + eeeo[0] = + g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][0] + * pi2_tmp[24 + * trans_size]; + eeeo[1] = + g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][1] + * pi2_tmp[24 + * trans_size]; + eeee[0] = + g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_tmp[16 + * trans_size]; + eeee[1] = + g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_tmp[16 + * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + } + else if((zero_rows & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_32[3][k] + * pi2_src[3 * src_strd] + + g_ai2_ihevc_trans_32[5][k] + * pi2_src[5 * src_strd] + + g_ai2_ihevc_trans_32[7][k] + * pi2_src[7 * src_strd]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_32[6][k] + * pi2_src[6 * src_strd]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_32[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_32[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_32[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_32[15][k] + * pi2_tmp[15 * trans_size] + + g_ai2_ihevc_trans_32[17][k] + * pi2_tmp[17 * trans_size] + + g_ai2_ihevc_trans_32[19][k] + * pi2_tmp[19 * trans_size] + + g_ai2_ihevc_trans_32[21][k] + * pi2_tmp[21 * trans_size] + + g_ai2_ihevc_trans_32[23][k] + * pi2_tmp[23 * trans_size] + + g_ai2_ihevc_trans_32[25][k] + * pi2_tmp[25 * trans_size] + + g_ai2_ihevc_trans_32[27][k] + * pi2_tmp[27 * trans_size] + + g_ai2_ihevc_trans_32[29][k] + * pi2_tmp[29 * trans_size] + + g_ai2_ihevc_trans_32[31][k] + * pi2_tmp[31 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_32[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_32[14][k] + * pi2_tmp[14 * trans_size] + + g_ai2_ihevc_trans_32[18][k] + * pi2_tmp[18 * trans_size] + + g_ai2_ihevc_trans_32[22][k] + * pi2_tmp[22 * trans_size] + + g_ai2_ihevc_trans_32[26][k] + * pi2_tmp[26 * trans_size] + + g_ai2_ihevc_trans_32[30][k] + * pi2_tmp[30 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_32[12][k] + * pi2_tmp[12 * trans_size] + + g_ai2_ihevc_trans_32[20][k] + * pi2_tmp[20 * trans_size] + + g_ai2_ihevc_trans_32[28][k] + * pi2_tmp[28 * trans_size]; + } + eeeo[0] = + g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][0] + * pi2_tmp[24 + * trans_size]; + eeeo[1] = + g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][1] + * pi2_tmp[24 + * trans_size]; + eeee[0] = + g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_tmp[16 + * trans_size]; + eeee[1] = + g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_tmp[16 + * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + } + else /* All rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + /* Inverse Transform 1st stage */ + shift = IT_SHIFT_STAGE_1; + add = 1 << (shift - 1); + + for(j = 0; j < row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd] + + g_ai2_ihevc_trans_32[3][k] + * pi2_src[3 * src_strd] + + g_ai2_ihevc_trans_32[5][k] + * pi2_src[5 * src_strd] + + g_ai2_ihevc_trans_32[7][k] + * pi2_src[7 * src_strd] + + g_ai2_ihevc_trans_32[9][k] + * pi2_src[9 * src_strd] + + g_ai2_ihevc_trans_32[11][k] + * pi2_src[11 * src_strd] + + g_ai2_ihevc_trans_32[13][k] + * pi2_src[13 * src_strd] + + g_ai2_ihevc_trans_32[15][k] + * pi2_src[15 * src_strd] + + g_ai2_ihevc_trans_32[17][k] + * pi2_src[17 * src_strd] + + g_ai2_ihevc_trans_32[19][k] + * pi2_src[19 * src_strd] + + g_ai2_ihevc_trans_32[21][k] + * pi2_src[21 * src_strd] + + g_ai2_ihevc_trans_32[23][k] + * pi2_src[23 * src_strd] + + g_ai2_ihevc_trans_32[25][k] + * pi2_src[25 * src_strd] + + g_ai2_ihevc_trans_32[27][k] + * pi2_src[27 * src_strd] + + g_ai2_ihevc_trans_32[29][k] + * pi2_src[29 * src_strd] + + g_ai2_ihevc_trans_32[31][k] + * pi2_src[31 * src_strd]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd] + + g_ai2_ihevc_trans_32[6][k] + * pi2_src[6 * src_strd] + + g_ai2_ihevc_trans_32[10][k] + * pi2_src[10 * src_strd] + + g_ai2_ihevc_trans_32[14][k] + * pi2_src[14 * src_strd] + + g_ai2_ihevc_trans_32[18][k] + * pi2_src[18 * src_strd] + + g_ai2_ihevc_trans_32[22][k] + * pi2_src[22 * src_strd] + + g_ai2_ihevc_trans_32[26][k] + * pi2_src[26 * src_strd] + + g_ai2_ihevc_trans_32[30][k] + * pi2_src[30 * src_strd]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd] + + g_ai2_ihevc_trans_32[12][k] + * pi2_src[12 * src_strd] + + g_ai2_ihevc_trans_32[20][k] + * pi2_src[20 * src_strd] + + g_ai2_ihevc_trans_32[28][k] + * pi2_src[28 * src_strd]; + } + eeeo[0] = g_ai2_ihevc_trans_32[8][0] * pi2_src[8 * src_strd] + + g_ai2_ihevc_trans_32[24][0] + * pi2_src[24 * src_strd]; + eeeo[1] = g_ai2_ihevc_trans_32[8][1] * pi2_src[8 * src_strd] + + g_ai2_ihevc_trans_32[24][1] + * pi2_src[24 * src_strd]; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_src[16 * src_strd]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_src[16 * src_strd]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_tmp[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + pi2_tmp[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + } + pi2_src++; + pi2_tmp += trans_size; + zero_cols = zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + shift = IT_SHIFT_STAGE_2; + add = 1 << (shift - 1); + if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]; + } +// for(k = 0; k < 4; k++) + { + eeo[0] = 0; + eeo[1] = 0; + eeo[2] = 0; + eeo[3] = 0; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]; + } + eeeo[0] = 0; + eeeo[1] = 0; + eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]; + eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 16; k++) + { + o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size] + + g_ai2_ihevc_trans_32[3][k] + * pi2_tmp[3 * trans_size] + + g_ai2_ihevc_trans_32[5][k] + * pi2_tmp[5 * trans_size] + + g_ai2_ihevc_trans_32[7][k] + * pi2_tmp[7 * trans_size] + + g_ai2_ihevc_trans_32[9][k] + * pi2_tmp[9 * trans_size] + + g_ai2_ihevc_trans_32[11][k] + * pi2_tmp[11 * trans_size] + + g_ai2_ihevc_trans_32[13][k] + * pi2_tmp[13 * trans_size] + + g_ai2_ihevc_trans_32[15][k] + * pi2_tmp[15 * trans_size] + + g_ai2_ihevc_trans_32[17][k] + * pi2_tmp[17 * trans_size] + + g_ai2_ihevc_trans_32[19][k] + * pi2_tmp[19 * trans_size] + + g_ai2_ihevc_trans_32[21][k] + * pi2_tmp[21 * trans_size] + + g_ai2_ihevc_trans_32[23][k] + * pi2_tmp[23 * trans_size] + + g_ai2_ihevc_trans_32[25][k] + * pi2_tmp[25 * trans_size] + + g_ai2_ihevc_trans_32[27][k] + * pi2_tmp[27 * trans_size] + + g_ai2_ihevc_trans_32[29][k] + * pi2_tmp[29 * trans_size] + + g_ai2_ihevc_trans_32[31][k] + * pi2_tmp[31 * trans_size]; + } + for(k = 0; k < 8; k++) + { + eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size] + + g_ai2_ihevc_trans_32[6][k] + * pi2_tmp[6 * trans_size] + + g_ai2_ihevc_trans_32[10][k] + * pi2_tmp[10 * trans_size] + + g_ai2_ihevc_trans_32[14][k] + * pi2_tmp[14 * trans_size] + + g_ai2_ihevc_trans_32[18][k] + * pi2_tmp[18 * trans_size] + + g_ai2_ihevc_trans_32[22][k] + * pi2_tmp[22 * trans_size] + + g_ai2_ihevc_trans_32[26][k] + * pi2_tmp[26 * trans_size] + + g_ai2_ihevc_trans_32[30][k] + * pi2_tmp[30 * trans_size]; + } + for(k = 0; k < 4; k++) + { + eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size] + + g_ai2_ihevc_trans_32[12][k] + * pi2_tmp[12 * trans_size] + + g_ai2_ihevc_trans_32[20][k] + * pi2_tmp[20 * trans_size] + + g_ai2_ihevc_trans_32[28][k] + * pi2_tmp[28 * trans_size]; + } + eeeo[0] = + g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][0] + * pi2_tmp[24 + * trans_size]; + eeeo[1] = + g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size] + + g_ai2_ihevc_trans_32[24][1] + * pi2_tmp[24 + * trans_size]; + eeee[0] = + g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][0] + * pi2_tmp[16 + * trans_size]; + eeee[1] = + g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0] + + g_ai2_ihevc_trans_32[16][1] + * pi2_tmp[16 + * trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + eee[0] = eeee[0] + eeeo[0]; + eee[3] = eeee[0] - eeeo[0]; + eee[1] = eeee[1] + eeeo[1]; + eee[2] = eeee[1] - eeeo[1]; + for(k = 0; k < 4; k++) + { + ee[k] = eee[k] + eeo[k]; + ee[k + 4] = eee[3 - k] - eeo[3 - k]; + } + for(k = 0; k < 8; k++) + { + e[k] = ee[k] + eo[k]; + e[k + 8] = ee[7 - k] - eo[7 - k]; + } + for(k = 0; k < 16; k++) + { + pi2_dst[k] = + CLIP_S16(((e[k] + o[k] + add) >> shift)); + + pi2_dst[k + 16] = + CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift)); + } + pi2_tmp++; + pi2_dst += dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_32x32****************************************/ + /************************************************************************************************/ + } +} + + +void ihevc_res_4x4_rotate(WORD16 *pi2_src, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols) +{ + WORD32 i, j; + WORD32 trans_size; + + trans_size = TRANS_SIZE_4; + + WORD32 offset = trans_size * src_strd - 1; + + zero_cols = gau4_ihevcd_4_bit_reverse[zero_cols & 0xF]; + + for(i = 0; i < trans_size; i++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = 0; + } + } + else + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = pi2_src[offset - (j * src_strd + i)]; + } + } + pi2_dst++; + zero_cols = zero_cols >> 1; + } +} + + +void ihevc_res_nxn_copy(WORD16 *pi2_src, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 trans_size, + WORD32 zero_cols) +{ + + WORD32 i, j; + + for(i = 0; i < trans_size; i++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = 0; + } + } + else + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = pi2_src[(j * src_strd + i)]; + } + } + pi2_dst++; + zero_cols = zero_cols >> 1; + } +} + + +void ihevc_res_nxn_rdpcm_horz(WORD16 *pi2_src, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 trans_size, + WORD32 zero_cols) +{ + WORD32 i, j; + + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = 0; + } + } + else + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = pi2_src[j * src_strd]; + } + } + pi2_dst++; + zero_cols >>= 1; + + for(i = 1; i < trans_size; i++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = pi2_dst[j * dst_strd - 1]; + } + } + else + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = pi2_src[j * src_strd + i] + pi2_dst[j * dst_strd - 1]; + } + } + pi2_dst++; + zero_cols >>= 1; + } +} + + +void ihevc_res_nxn_rdpcm_vert(WORD16 *pi2_src, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 trans_size, + WORD32 zero_cols) +{ + WORD32 i, j; + + for(i = 0; i < trans_size; i++) + { + /* Checking for Zero Cols */ + if((zero_cols & 1) == 1) + { + for(j = 0; j < trans_size; j++) + { + pi2_dst[j * dst_strd] = 0; + } + } + else + { + WORD16 acc = pi2_src[i]; + + pi2_dst[0] = acc; + for(j = 1; j < trans_size; j++) + { + acc += pi2_src[j * src_strd + i]; + pi2_dst[j * dst_strd] = acc; + } + } + pi2_dst++; + zero_cols = zero_cols >> 1; + } +} + diff --git a/common/ihevc_itrans_res.h b/common/ihevc_itrans_res.h new file mode 100644 index 0000000..e14510c --- /dev/null +++ b/common/ihevc_itrans_res.h @@ -0,0 +1,108 @@ +/****************************************************************************** + * + * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ +/** + ******************************************************************************* + * @file + * ihevc_itrans_res.h + * + * @brief + * Functions declarations for inverse transform + * + * @author + * Ittiam + * + * @remarks + * None + * + ******************************************************************************* + */ +#ifndef _IHEVC_ITRANS_RES_H_ +#define _IHEVC_ITRANS_RES_H_ + +typedef void ihevc_itrans_res_4x4_ttype1_ft(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); + +typedef void ihevc_itrans_res_4x4_ft(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); + +typedef void ihevcd_itrans_res_dc_ft(WORD16 *pi2_dst, + WORD32 dst_strd, + WORD32 log2_trans_size, + WORD16 i2_coeff_value); + +typedef void ihevc_itrans_res_8x8_ft(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); + +typedef void ihevc_itrans_res_16x16_ft(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); + +typedef void ihevc_itrans_res_32x32_ft(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); + +typedef void ihevc_res_4x4_transform(WORD16 *pi2_src, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 zero_cols); + +typedef void ihevc_res_nxn_transform(WORD16 *pi2_src, + WORD16 *pi2_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 trans_size, + WORD32 zero_cols); + +/* C function declarations */ +ihevc_itrans_res_4x4_ttype1_ft ihevc_itrans_res_4x4_ttype1; +ihevc_itrans_res_4x4_ft ihevc_itrans_res_4x4; +ihevcd_itrans_res_dc_ft ihevcd_itrans_res_dc; +ihevc_itrans_res_8x8_ft ihevc_itrans_res_8x8; +ihevc_itrans_res_16x16_ft ihevc_itrans_res_16x16; +ihevc_itrans_res_32x32_ft ihevc_itrans_res_32x32; + +ihevc_res_4x4_transform ihevc_res_4x4_rotate; +ihevc_res_nxn_transform ihevc_res_nxn_copy; +ihevc_res_nxn_transform ihevc_res_nxn_rdpcm_horz; +ihevc_res_nxn_transform ihevc_res_nxn_rdpcm_vert; + +#endif /*_IHEVC_ITRANS_RES_H_*/ diff --git a/common/ihevc_structs.h b/common/ihevc_structs.h index 55d746e..ddab43f 100644 --- a/common/ihevc_structs.h +++ b/common/ihevc_structs.h @@ -39,6 +39,7 @@ #define _IHEVC_STRUCTS_H_ +#ifndef DISABLE_SEI /** * Buffering Period SEI parameters Info */ @@ -519,6 +520,7 @@ typedef struct time_code_t s_time_code; } sei_params_t; +#endif /** * Sub-layer HRD parameters Info @@ -952,7 +954,9 @@ typedef struct // See IV_FLD_TYPE_T for all field types UWORD32 e4_fld_type; +#ifndef DISABLE_SEI sei_params_t s_sei_params; +#endif WORD32 i4_vui_present; @@ -1416,6 +1420,28 @@ typedef struct */ UWORD32 b3_chroma_intra_mode_idx : 3; +#ifdef ENABLE_MAIN_REXT_PROFILE + /** + * Cb CCP alpha magnitude + */ + UWORD32 b3_cb_log2_res_scale_abs_plus1 : 3; + + /** + * Cb CCP alpha sign + */ + UWORD32 b1_cb_log2_res_sign : 1; + + /** + * Cr CCP alpha magnitude + */ + UWORD32 b3_cr_log2_res_scale_abs_plus1 : 3; + + /** + * Cr CCP alpha sign + */ + UWORD32 b1_cr_log2_res_sign : 1; +#endif + }tu_t; /** @@ -2477,6 +2503,36 @@ typedef struct */ vui_t s_vui_parameters; + /** + * sps_extension_present_flag + */ + WORD8 i1_sps_extension_present_flag; + + /** + * sps_range_extension_present_flag + */ + WORD8 i1_sps_range_extension_flag; + + /** + * sps_multilayer_extension_present_flag + */ + WORD8 i1_sps_multilayer_extension_flag; + + /** + * sps_3d_extension_present_flag + */ + WORD8 i1_sps_3d_extension_flag; + + /** + * sps_scc_extension_present_flag + */ + WORD8 i1_sps_scc_extension_flag; + + /** + * sps_extension_reserved + */ + WORD8 i1_sps_extension_4bits; + /** * Log2(CTB Size) in luma units */ @@ -2539,8 +2595,8 @@ typedef struct /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */ /* Intra 16x16 Y, 16x16 U, 16x16 V */ /* Inter 16x16 Y, 16x16 U, 16x16 V */ - /* Intra 32x32 Y */ - /* Inter 32x32 Y */ + /* Intra 32x32 Y, 32x32 U, 32x32 V */ + /* Inter 32x32 Y, 32x32 U, 32x32 V */ /*************************************************************************/ WORD16 *pi2_scaling_mat; @@ -2582,9 +2638,9 @@ typedef struct WORD8 i1_use_high_precision_pred_wt; /** - * fast_rice_adaptation_enabled_flag + * persistent_rice_adaptation_enabled_flag */ - WORD8 i1_fast_rice_adaptation_enabled_flag; + WORD8 i1_persistent_rice_adaptation_enabled_flag; /** * cabac_bypass_alignment_enabled_flag @@ -2615,8 +2671,8 @@ typedef struct /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */ /* Intra 16x16 Y, 16x16 U, 16x16 V */ /* Inter 16x16 Y, 16x16 U, 16x16 V */ - /* Intra 32x32 Y */ - /* Inter 32x32 Y */ + /* Intra 32x32 Y, 32x32 U, 32x32 V */ + /* Inter 32x32 Y, 32x32 U, 32x32 V */ /*************************************************************************/ WORD16 *pi2_scaling_mat; @@ -2818,6 +2874,36 @@ typedef struct */ WORD8 i1_slice_extension_present_flag; + /** + * pps_extension_present_flag + */ + WORD8 i1_pps_extension_present_flag; + + /** + * pps_range_extension_present_flag + */ + WORD8 i1_pps_range_extension_flag; + + /** + * pps_multilayer_extension_present_flag + */ + WORD8 i1_pps_multilayer_extension_flag; + + /** + * pps_3d_extension_present_flag + */ + WORD8 i1_pps_3d_extension_flag; + + /** + * pps_scc_extension_present_flag + */ + WORD8 i1_pps_scc_extension_flag; + + /** + * pps_extension_reserved + */ + WORD8 i1_pps_extension_4bits; + /** * scaling_list_dc_coef_minus8 */ @@ -2838,7 +2924,7 @@ typedef struct /** * log2_max_transform_skip_block_size_minus2 */ - WORD32 i4_log2_max_transform_skip_block_size_minus2; + WORD8 i1_log2_max_transform_skip_block_size_minus2; /** * cross_component_prediction_enabled_flag diff --git a/common/ihevc_trans_tables.c b/common/ihevc_trans_tables.c index 7c93662..3eb392e 100644 --- a/common/ihevc_trans_tables.c +++ b/common/ihevc_trans_tables.c @@ -877,6 +877,12 @@ const WORD16 g_ai2_ihevc_trans_intr_4[4][8] = const UWORD8 IHEVCE_CHROMA_SHUFFLEMASK_HBD[8] = { 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D }; + +const UWORD32 gau4_ihevcd_4_bit_reverse[] = +{ + 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 +}; + #ifndef DISABLE_AVX2 const WORD32 g_ai4_ihevc_trans_8_intr_avx2[7][8] = { /* 4*32 = 128 bit */ diff --git a/common/ihevc_trans_tables.h b/common/ihevc_trans_tables.h index 581cd92..6d07d41 100644 --- a/common/ihevc_trans_tables.h +++ b/common/ihevc_trans_tables.h @@ -116,4 +116,6 @@ extern const WORD16 g_ai2_ihevc_trans_intr_4[4][8]; extern const UWORD8 IHEVCE_CHROMA_SHUFFLEMASK_HBD[8]; +extern MEM_ALIGN16 const UWORD32 gau4_ihevcd_4_bit_reverse[16]; + #endif /*_IHEVC_TRANS_TABLES_H_*/ diff --git a/common/x86/ihevc_intra_pred_filters_sse42_intr.c b/common/x86/ihevc_intra_pred_filters_sse42_intr.c index 0a1ce0a..235a289 100644 --- a/common/x86/ihevc_intra_pred_filters_sse42_intr.c +++ b/common/x86/ihevc_intra_pred_filters_sse42_intr.c @@ -91,6 +91,11 @@ * @param[in] mode * integer intraprediction mode * +* @param[in] intra_smoothing_flags +* integer bit 3 indicates if intra smoothing is enabled/disabled +* unconditionally. this is applicable to frext profiles only +* bit 0 indicates strong intra smoothing enabled/disabled +* * @returns * * @remarks @@ -104,7 +109,7 @@ void ihevc_intra_pred_ref_filtering_sse42(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst, WORD32 mode, - WORD32 strong_intra_smoothing_enable_flag) + WORD32 intra_smoothing_flags) { WORD32 filter_flag; WORD32 i; /* Generic indexing variable */ @@ -117,11 +122,10 @@ void ihevc_intra_pred_ref_filtering_sse42(UWORD8 *pu1_src, __m128i src_temp1, src_temp2, src_temp3, src_temp7; __m128i src_temp4, src_temp5, src_temp6, src_temp8; - //WORD32 strong_intra_smoothing_enable_flag = 1; - - - - filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)); + WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3); + WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1; + filter_flag = intra_smoothing_disabled ? + 0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2))); if(0 == filter_flag) { if(pu1_src == pu1_dst) diff --git a/common/x86/ihevc_intra_pred_filters_ssse3_intr.c b/common/x86/ihevc_intra_pred_filters_ssse3_intr.c index 8441a8a..ec479d2 100644 --- a/common/x86/ihevc_intra_pred_filters_ssse3_intr.c +++ b/common/x86/ihevc_intra_pred_filters_ssse3_intr.c @@ -377,6 +377,11 @@ void ihevc_intra_pred_luma_ref_substitution_ssse3(UWORD8 *pu1_top_left, * @param[in] mode * integer intraprediction mode * +* @param[in] intra_smoothing_flags +* integer bit 3 indicates if intra smoothing is enabled/disabled +* unconditionally. this is applicable to frext profiles only +* bit 0 indicates strong intra smoothing enabled/disabled +* * @returns * * @remarks @@ -389,7 +394,7 @@ void ihevc_intra_pred_ref_filtering_ssse3(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst, WORD32 mode, - WORD32 strong_intra_smoothing_enable_flag) + WORD32 intra_smoothing_flags) { WORD32 filter_flag; WORD32 i; /* Generic indexing variable */ @@ -402,9 +407,10 @@ void ihevc_intra_pred_ref_filtering_ssse3(UWORD8 *pu1_src, __m128i src_temp1, src_temp2, src_temp3, src_temp7; __m128i src_temp4, src_temp5, src_temp6, src_temp8; - //WORD32 strong_intra_smoothing_enable_flag = 1; - - filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)); + WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3); + WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1; + filter_flag = intra_smoothing_disabled ? + 0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2))); if(0 == filter_flag) { if(pu1_src == pu1_dst) diff --git a/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s b/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s deleted file mode 100644 index caf7123..0000000 --- a/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s +++ /dev/null @@ -1,453 +0,0 @@ -@/***************************************************************************** -@* -@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore -@* -@* Licensed under the Apache License, Version 2.0 (the "License"); -@* you may not use this file except in compliance with the License. -@* You may obtain a copy of the License at: -@* -@* http://www.apache.org/licenses/LICENSE-2.0 -@* -@* Unless required by applicable law or agreed to in writing, software -@* distributed under the License is distributed on an "AS IS" BASIS, -@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@* See the License for the specific language governing permissions and -@* limitations under the License. -@* -@*****************************************************************************/ -@/** -@/******************************************************************************* -@* @file -@* ihevcd_fmt_conv_420sp_to_rgba8888.s -@* -@* @brief -@* contains function definitions for format conversions -@* -@* @author -@* ittiam -@* -@* @par list of functions: -@* -@* -@* @remarks -@* none -@* -@*******************************************************************************/ - .equ DO1STROUNDING, 0 - - @ ARM - @ - @ PRESERVE8 - -.text -.p2align 2 - - - - -@/***************************************************************************** -@* * -@* Function Name : ihevcd_fmt_conv_420sp_to_rgba8888() * -@* * -@* Description : This function conversts the image from YUV422 color * -@* space to RGB888 color space. The function can be * -@* invoked at the MB level. * -@* * -@* Arguments : R0 pubY * -@* R1 pubUV * -@* R2 pusRGB * -@* R3 pusRGB * -@* [R13 #40] usHeight * -@* [R13 #44] usWidth * -@* [R13 #48] usStrideY * -@* [R13 #52] usStrideU * -@* [R13 #56] usStrideV * -@* [R13 #60] usStrideRGB * -@* * -@* Values Returned : None * -@* * -@* Register Usage : R0 - R14 * -@* * -@* Stack Usage : 104 Bytes * -@* * -@* Interruptibility : Interruptible * -@* * -@* Known Limitations * -@* Assumptions: Image Width: Assumed to be multiple of 16 and * -@* greater than or equal to 16 * -@* Image Height: Assumed to be even. * -@* * -@* Revision History : * -@* DD MM YYYY Author(s) Changes (Describe the changes made) * -@* 07 06 2010 Varshita Draft * -@* 07 06 2010 Naveen Kr T Completed * -@* 05 08 2013 Naveen K P Modified for HEVC * -@* 30 10 2018 Saurabh Sood Store D registers to stack * -@*****************************************************************************/ - .global ihevcd_fmt_conv_420sp_to_rgba8888_a9q -.type ihevcd_fmt_conv_420sp_to_rgba8888_a9q, function -ihevcd_fmt_conv_420sp_to_rgba8888_a9q: - - @// push the registers on the stack - STMFD SP!,{R4-R12,LR} - VPUSH {d8-d15} - - @//R0 - Y PTR - @//R1 - UV PTR - @//R2 - RGB PTR - @//R3 - RGB PTR - @//R4 - PIC WIDTH - @//R5 - PIC HT - @//R6 - STRIDE Y - @//R7 - STRIDE U - @//R8 - STRIDE V - @//R9 - STRIDE RGB - - @//ONE ROW PROCESSING AT A TIME - - @//THE FOUR CONSTANTS ARE: - @//C1=0x3311,C2=0xF379,C3=0xE5F8,C4=0x4092 - - @PLD [R0] - @PLD [R1] - @PLD [R2] - - - @/* can be loaded from a defined const type */ - MOVW R10,#0x3311 - VMOV.16 D0[0],R10 @//C1 - - MOVW R10,#0xF379 - VMOV.16 D0[1],R10 @//C2 - - MOVW R10,#0xE5F8 - VMOV.16 D0[2],R10 @//C3 - - MOVW R10,#0x4092 - VMOV.16 D0[3],R10 @//C4 - - @//LOAD CONSTANT 128 INTO A CORTEX REGISTER - MOV R10,#128 - VDUP.8 D1,R10 - - @//D0 HAS C1-C2-C3-C4 - @// load other parameters from stack - LDR R5,[sp,#104] - @LDR R4,[sp,#44] - LDR R6,[sp,#108] - LDR R7,[sp,#112] - @LDR R8,[sp,#52] - LDR R9,[sp,#116] - - @// calculate offsets, offset = stride - width - SUB R10,R6,R3 @// luma offset - SUB R11,R7,R3 - @, LSR #1 @// u offset - @SUB R12,R8,R3, LSR #1 @// v offset - SUB R14,R9,R3 @// rgb offset in pixels - - @// calculate height loop count - MOV R5,R5, LSR #1 @// height_cnt = height / 16 - - @// create next row pointers for rgb and luma data - ADD R7,R0,R6 @// luma_next_row = luma + luma_stride - ADD R8,R2,R9,LSL #2 @// rgb_next_row = rgb + rgb_stride - -LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP: - - @//LOAD VALUES OF U&V AND COMPUTE THE R,G,B WEIGHT VALUES. - VLD1.8 {D2,D3},[R1]! @//LOAD 8 VALUES OF UV - @//VLD1.8 {D3},[R2]! @//LOAD 8 VALUES OF V - - @// calculate width loop count - MOV R6,R3, LSR #4 @// width_cnt = width / 16 - - @//COMPUTE THE ACTUAL RGB VALUES,WE CAN DO TWO ROWS AT A TIME - @//LOAD VALUES OF Y 8-BIT VALUES - VLD2.8 {D30,D31},[R0]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row 1 - @//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - VLD2.8 {D28,D29},[R7]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row2 - @//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - - SUBS R6,R6,#1 - BEQ LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP_SKIP - -LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP: - @VMOV.I8 Q1,#128 - VUZP.8 D2,D3 - - - @//NEED TO SUBTRACT (U-128) AND (V-128) - @//(D2-D1),(D3-D1) - VSUBL.U8 Q2,D2,D1 @//(U-128) - VSUBL.U8 Q3,D3,D1 @//(V-128) - - @//LOAD VALUES OF U&V for next row - VLD1.8 {D2,D3},[R1]! @//LOAD 8 VALUES OF U - @//VLD1.8 {D3},[R2]! @//LOAD 8 VALUES OF V - - @PLD [R0] - PLD [R1] - - @//NEED TO MULTIPLY WITH Q2,Q3 WITH CO-EEFICIENTS - VMULL.S16 Q4,D4,D0[3] @//(U-128)*C4 FOR B - VMULL.S16 Q5,D5,D0[3] @//(U-128)*C4 FOR B - - VMULL.S16 Q10,D6,D0[0] @//(V-128)*C1 FOR R - VMULL.S16 Q11,D7,D0[0] @//(V-128)*C1 FOR R - - VMULL.S16 Q6,D4,D0[1] @//(U-128)*C2 FOR G - VMLAL.S16 Q6,D6,D0[2] @//Q6 = (U-128)*C2 + (V-128)*C3 - VMULL.S16 Q7,D5,D0[1] @//(U-128)*C2 FOR G - VMLAL.S16 Q7,D7,D0[2] @//Q7 = (U-128)*C2 + (V-128)*C3 - - @//NARROW RIGHT SHIFT BY 13 FOR R&B - VQSHRN.S32 D8,Q4,#13 @//D8 = (U-128)*C4>>13 4 16-BIT VALUES - VQSHRN.S32 D9,Q5,#13 @//D9 = (U-128)*C4>>13 4 16-BIT VALUES - @//Q4 - WEIGHT FOR B - - @//NARROW RIGHT SHIFT BY 13 FOR R&B - VQSHRN.S32 D10,Q10,#13 @//D10 = (V-128)*C1>>13 4 16-BIT VALUES - VQSHRN.S32 D11,Q11,#13 @//D11 = (V-128)*C1>>13 4 16-BIT VALUES - @//Q5 - WEIGHT FOR R - - @//NARROW RIGHT SHIFT BY 13 FOR G - VQSHRN.S32 D12,Q6,#13 @//D12 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - VQSHRN.S32 D13,Q7,#13 @//D13 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - @//Q6 - WEIGHT FOR G - - VADDW.U8 Q7,Q4,D30 @//Q7 - HAS Y + B - VADDW.U8 Q8,Q5,D30 @//Q8 - HAS Y + R - VADDW.U8 Q9,Q6,D30 @//Q9 - HAS Y + G - - VADDW.U8 Q10,Q4,D31 @//Q10 - HAS Y + B - VADDW.U8 Q11,Q5,D31 @//Q11 - HAS Y + R - VADDW.U8 Q12,Q6,D31 @//Q12 - HAS Y + G - - VQMOVUN.S16 D14,Q7 - VQMOVUN.S16 D15,Q9 - VQMOVUN.S16 D16,Q8 - VMOV.I8 D17,#0 - - VZIP.8 D14,D15 - VZIP.8 D16,D17 - VZIP.16 Q7,Q8 - - - VQMOVUN.S16 D20,Q10 - VQMOVUN.S16 D21,Q12 - VQMOVUN.S16 D22,Q11 - VMOV.I8 D23,#0 - - VZIP.8 D20,D21 - VZIP.8 D22,D23 - VZIP.16 Q10,Q11 - - VZIP.32 Q7,Q10 - VZIP.32 Q8,Q11 - - VST1.32 D14,[R2]! - VST1.32 D15,[R2]! - VST1.32 D20,[R2]! - VST1.32 D21,[R2]! - VST1.32 D16,[R2]! - VST1.32 D17,[R2]! - VST1.32 D22,[R2]! - VST1.32 D23,[R2]! - - @//D14-D20 - TOALLY HAVE 16 VALUES - @//WE NEED TO SHIFT R,G,B VALUES TO GET 5BIT,6BIT AND 5BIT COMBINATIONS - VADDW.U8 Q7,Q4,D28 @//Q7 - HAS Y + B - VADDW.U8 Q8,Q5,D28 @//Q2 - HAS Y + R - VADDW.U8 Q9,Q6,D28 @//Q3 - HAS Y + G - - VADDW.U8 Q10,Q4,D29 @//Q10 - HAS Y + B - VADDW.U8 Q11,Q5,D29 @//Q11 - HAS Y + R - VADDW.U8 Q12,Q6,D29 @//Q12 - HAS Y + G - - @//COMPUTE THE ACTUAL RGB VALUES,WE CAN DO TWO ROWS AT A TIME - @//LOAD VALUES OF Y 8-BIT VALUES - VLD2.8 {D30,D31},[R0]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row 1 - @//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - VLD2.8 {D28,D29},[R7]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row2 - @//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - - PLD [R0] - PLD [R7] - - VQMOVUN.S16 D14,Q7 - VQMOVUN.S16 D15,Q9 - VQMOVUN.S16 D16,Q8 - VMOV.I8 D17,#0 - - VZIP.8 D14,D15 - VZIP.8 D16,D17 - VZIP.16 Q7,Q8 - - - VQMOVUN.S16 D20,Q10 - VQMOVUN.S16 D21,Q12 - VQMOVUN.S16 D22,Q11 - VMOV.I8 D23,#0 - - VZIP.8 D20,D21 - VZIP.8 D22,D23 - VZIP.16 Q10,Q11 - - VZIP.32 Q7,Q10 - VZIP.32 Q8,Q11 - - VST1.32 D14,[R8]! - VST1.32 D15,[R8]! - VST1.32 D20,[R8]! - VST1.32 D21,[R8]! - VST1.32 D16,[R8]! - VST1.32 D17,[R8]! - VST1.32 D22,[R8]! - VST1.32 D23,[R8]! - - SUBS R6,R6,#1 @// width_cnt -= 1 - BNE LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP - -LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP_SKIP: - @VMOV.I8 Q1,#128 - VUZP.8 D2,D3 - - - @//NEED TO SUBTRACT (U-128) AND (V-128) - @//(D2-D1),(D3-D1) - VSUBL.U8 Q2,D2,D1 @//(U-128) - VSUBL.U8 Q3,D3,D1 @//(V-128) - - - @//NEED TO MULTIPLY WITH Q2,Q3 WITH CO-EEFICIENTS - VMULL.S16 Q4,D4,D0[3] @//(U-128)*C4 FOR B - VMULL.S16 Q5,D5,D0[3] @//(U-128)*C4 FOR B - - VMULL.S16 Q10,D6,D0[0] @//(V-128)*C1 FOR R - VMULL.S16 Q11,D7,D0[0] @//(V-128)*C1 FOR R - - VMULL.S16 Q6,D4,D0[1] @//(U-128)*C2 FOR G - VMLAL.S16 Q6,D6,D0[2] @//Q6 = (U-128)*C2 + (V-128)*C3 - VMULL.S16 Q7,D5,D0[1] @//(U-128)*C2 FOR G - VMLAL.S16 Q7,D7,D0[2] @//Q7 = (U-128)*C2 + (V-128)*C3 - - @//NARROW RIGHT SHIFT BY 13 FOR R&B - VQSHRN.S32 D8,Q4,#13 @//D8 = (U-128)*C4>>13 4 16-BIT VALUES - VQSHRN.S32 D9,Q5,#13 @//D9 = (U-128)*C4>>13 4 16-BIT VALUES - @//Q4 - WEIGHT FOR B - - @//NARROW RIGHT SHIFT BY 13 FOR R&B - VQSHRN.S32 D10,Q10,#13 @//D10 = (V-128)*C1>>13 4 16-BIT VALUES - VQSHRN.S32 D11,Q11,#13 @//D11 = (V-128)*C1>>13 4 16-BIT VALUES - @//Q5 - WEIGHT FOR R - - @//NARROW RIGHT SHIFT BY 13 FOR G - VQSHRN.S32 D12,Q6,#13 @//D12 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - VQSHRN.S32 D13,Q7,#13 @//D13 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - @//Q6 - WEIGHT FOR G - - VADDW.U8 Q7,Q4,D30 @//Q7 - HAS Y + B - VADDW.U8 Q8,Q5,D30 @//Q8 - HAS Y + R - VADDW.U8 Q9,Q6,D30 @//Q9 - HAS Y + G - - VADDW.U8 Q10,Q4,D31 @//Q10 - HAS Y + B - VADDW.U8 Q11,Q5,D31 @//Q11 - HAS Y + R - VADDW.U8 Q12,Q6,D31 @//Q12 - HAS Y + G - - VQMOVUN.S16 D14,Q7 - VQMOVUN.S16 D15,Q9 - VQMOVUN.S16 D16,Q8 - VMOV.I8 D17,#0 - - VZIP.8 D14,D15 - VZIP.8 D16,D17 - VZIP.16 Q7,Q8 - - - VQMOVUN.S16 D20,Q10 - VQMOVUN.S16 D21,Q12 - VQMOVUN.S16 D22,Q11 - VMOV.I8 D23,#0 - - VZIP.8 D20,D21 - VZIP.8 D22,D23 - VZIP.16 Q10,Q11 - - VZIP.32 Q7,Q10 - VZIP.32 Q8,Q11 - - VST1.32 D14,[R2]! - VST1.32 D15,[R2]! - VST1.32 D20,[R2]! - VST1.32 D21,[R2]! - VST1.32 D16,[R2]! - VST1.32 D17,[R2]! - VST1.32 D22,[R2]! - VST1.32 D23,[R2]! - - @//D14-D20 - TOALLY HAVE 16 VALUES - @//WE NEED TO SHIFT R,G,B VALUES TO GET 5BIT,6BIT AND 5BIT COMBINATIONS - VADDW.U8 Q7,Q4,D28 @//Q7 - HAS Y + B - VADDW.U8 Q8,Q5,D28 @//Q2 - HAS Y + R - VADDW.U8 Q9,Q6,D28 @//Q3 - HAS Y + G - - VADDW.U8 Q10,Q4,D29 @//Q10 - HAS Y + B - VADDW.U8 Q11,Q5,D29 @//Q11 - HAS Y + R - VADDW.U8 Q12,Q6,D29 @//Q12 - HAS Y + G - - - VQMOVUN.S16 D14,Q7 - VQMOVUN.S16 D15,Q9 - VQMOVUN.S16 D16,Q8 - VMOV.I8 D17,#0 - - VZIP.8 D14,D15 - VZIP.8 D16,D17 - VZIP.16 Q7,Q8 - - - VQMOVUN.S16 D20,Q10 - VQMOVUN.S16 D21,Q12 - VQMOVUN.S16 D22,Q11 - VMOV.I8 D23,#0 - - VZIP.8 D20,D21 - VZIP.8 D22,D23 - VZIP.16 Q10,Q11 - - VZIP.32 Q7,Q10 - VZIP.32 Q8,Q11 - - VST1.32 D14,[R8]! - VST1.32 D15,[R8]! - VST1.32 D20,[R8]! - VST1.32 D21,[R8]! - VST1.32 D16,[R8]! - VST1.32 D17,[R8]! - VST1.32 D22,[R8]! - VST1.32 D23,[R8]! - - @// Adjust the address pointers - ADD R0,R7,R10 @// luma = luma_next + offset - ADD R2,R8,R14,LSL #2 @// rgb = rgb_next + offset - - ADD R7,R0,R3 @// luma_next = luma + width - ADD R8,R2,R3,LSL #2 @// rgb_next_row = rgb + width - - ADD R1,R1,R11 @// adjust u pointer - @ADD R2,R2,R12 @// adjust v pointer - - ADD R7,R7,R10 @// luma_next = luma + width + offset (because of register crunch) - ADD R8,R8,R14,LSL #2 @// rgb_next_row = rgb + width + offset - - SUBS R5,R5,#1 @// height_cnt -= 1 - - BNE LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP - - @//POP THE REGISTERS - VPOP {d8-d15} - LDMFD SP!,{R4-R12,PC} - - - .section .note.GNU-stack,"",%progbits diff --git a/decoder/arm/ihevcd_function_selector.c b/decoder/arm/ihevcd_function_selector.c index 66c7d4d..129af63 100644 --- a/decoder/arm/ihevcd_function_selector.c +++ b/decoder/arm/ihevcd_function_selector.c @@ -58,10 +58,6 @@ #include "ihevcd_function_selector.h" #include "ihevcd_structs.h" -void ihevcd_init_function_ptr_neonintr(codec_t *ps_codec); -void ihevcd_init_function_ptr_noneon(codec_t *ps_codec); -void ihevcd_init_function_ptr_a9q(codec_t *ps_codec); -void ihevcd_init_function_ptr_av8(codec_t *ps_codec); void ihevcd_init_function_ptr(void *pv_codec) { codec_t *ps_codec = (codec_t *)pv_codec; @@ -71,11 +67,11 @@ void ihevcd_init_function_ptr(void *pv_codec) { #ifndef DISABLE_NEONINTR case ARCH_ARM_NEONINTR: - ihevcd_init_function_ptr_neonintr(ps_codec); + ihevcd_init_function_ptr_neonintr(&ps_codec->s_func_selector); break; #endif case ARCH_ARM_NONEON: - ihevcd_init_function_ptr_noneon(ps_codec); + ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector); break; default: case ARCH_ARM_A5: @@ -84,9 +80,9 @@ void ihevcd_init_function_ptr(void *pv_codec) case ARCH_ARM_A15: case ARCH_ARM_A9Q: #ifndef DISABLE_NEON - ihevcd_init_function_ptr_a9q(ps_codec); + ihevcd_init_function_ptr_a9q(&ps_codec->s_func_selector); #else - ihevcd_init_function_ptr_noneon(ps_codec); + ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector); #endif break; } @@ -106,12 +102,17 @@ void ihevcd_init_function_ptr(void *pv_codec) switch(ps_codec->e_processor_arch) { case ARCH_ARM_NONEON: - ihevcd_init_function_ptr_noneon(ps_codec); + ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector); break; case ARCH_ARMV8_GENERIC: default: - ihevcd_init_function_ptr_av8(ps_codec); +#ifdef DARWIN + ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector); break; +#else + ihevcd_init_function_ptr_av8(&ps_codec->s_func_selector); + break; +#endif } #endif } diff --git a/decoder/arm/ihevcd_function_selector_a9q.c b/decoder/arm/ihevcd_function_selector_a9q.c index ea5b8c0..a22a925 100644 --- a/decoder/arm/ihevcd_function_selector_a9q.c +++ b/decoder/arm/ihevcd_function_selector_a9q.c @@ -54,107 +54,113 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" -void ihevcd_init_function_ptr_a9q(codec_t *ps_codec) +void ihevcd_init_function_ptr_a9q(func_selector_t *ps_func_selector) { - ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_a9q; - ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_a9q; - ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_a9q; - ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_a9q; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; - ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_neonintr; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_a9q; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_a9q; - ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; - ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; - ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; - ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_a9q; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_a9q; - ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_a9q; - ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_a9q; - ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_a9q; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4; - ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8; - ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; - ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_a9q; - ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy_a9q; - ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_a9q; - ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset_a9q; - ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_a9q; - ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit_a9q; - ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma_a9q; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma_a9q; - ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma_a9q; - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma_a9q; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_a9q; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_a9q; - ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_a9q; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_neonintr; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_neonintr; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_neonintr; - ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_a9q; - ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_a9q; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_a9q; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888_a9q; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_a9q; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_a9q; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_a9q; + ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_a9q; + ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_a9q; + ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_a9q; + ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_a9q; + ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_a9q; + ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_a9q; + ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_a9q; + ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_a9q; + ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_a9q; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_a9q; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_a9q; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_a9q; + ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_a9q; + ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_a9q; + ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_a9q; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_a9q; + ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_a9q; + ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_a9q; + ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_a9q; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_a9q; + ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution_a9q; + ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution_a9q; + ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; + ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_neonintr; + ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_a9q; + ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_a9q; + ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_a9q; + ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_a9q; + ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_a9q; + ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_a9q; + ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_a9q; + ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_a9q; + ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_a9q; + ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_a9q; + ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_a9q; + ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_a9q; + ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_a9q; + ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_a9q; + ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_a9q; + ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_a9q; + ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_a9q; + ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_a9q; + ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_a9q; + ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_a9q; + ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; + ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; + ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; + ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; + ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; + ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1; + ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4; + ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8; + ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16; + ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32; + ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_a9q; + ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_a9q; + ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_a9q; + ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_a9q; + ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_a9q; + ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; + ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; + ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; + ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32; + ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; + ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4; + ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8; + ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16; + ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32; + ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; + ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; + ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; + ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32; + ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_a9q; + ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy_a9q; + ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_a9q; + ps_func_selector->ihevc_memset_fptr = &ihevc_memset_a9q; + ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_a9q; + ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit_a9q; + ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma_a9q; + ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma_a9q; + ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma_a9q; + ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma_a9q; + ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_a9q; + ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_a9q; + ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_a9q; + ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_neonintr; + ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_neonintr; + ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_neonintr; + ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_a9q; + ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_a9q; + ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_a9q; + ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_a9q; + ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_a9q; + ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_a9q; + ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_a9q; + ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_a9q; + ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_a9q; + ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_a9q; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_a9q; + ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p; + ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_a9q; + ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_a9q; + ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc; } diff --git a/decoder/arm/ihevcd_function_selector_noneon.c b/decoder/arm/ihevcd_function_selector_noneon.c index b5c9f6a..b545587 100644 --- a/decoder/arm/ihevcd_function_selector_noneon.c +++ b/decoder/arm/ihevcd_function_selector_noneon.c @@ -54,107 +54,113 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" -void ihevcd_init_function_ptr_noneon(codec_t *ps_codec) +void ihevcd_init_function_ptr_noneon(func_selector_t *ps_func_selector) { - ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz; - ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert; - ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert; - ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; - ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25; - ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc; - ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9; - ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver; - ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; - ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; - ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; - ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4; - ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8; - ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; - ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; - ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy; - ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; - ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset; - ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; - ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit; - ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; - ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default; - ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni; - ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma; - ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma; + ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz; + ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert; + ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert; + ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz; + ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy; + ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out; + ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz; + ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out; + ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out; + ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz; + ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert; + ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp; + ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy; + ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out; + ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out; + ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; + ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc; + ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz; + ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2; + ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34; + ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33; + ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9; + ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar; + ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver; + ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17; + ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25; + ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17; + ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25; + ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc; + ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz; + ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2; + ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34; + ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33; + ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9; + ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar; + ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver; + ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; + ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; + ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; + ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; + ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; + ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1; + ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4; + ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8; + ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16; + ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32; + ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1; + ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4; + ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8; + ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16; + ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32; + ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; + ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; + ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; + ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32; + ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; + ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4; + ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8; + ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16; + ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32; + ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; + ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; + ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; + ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32; + ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; + ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy; + ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; + ps_func_selector->ihevc_memset_fptr = &ihevc_memset; + ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; + ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit; + ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; + ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; + ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; + ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; + ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi; + ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default; + ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni; + ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi; + ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default; + ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni; + ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma; + ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma; + ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0; + ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma; + ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1; + ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma; + ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2; + ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma; + ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3; + ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; + ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p; + ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma; + ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma; + ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc; } diff --git a/decoder/arm64/ihevcd_fmt_conv_420sp_to_rgba8888.s b/decoder/arm64/ihevcd_fmt_conv_420sp_to_rgba8888.s deleted file mode 100644 index 026b65f..0000000 --- a/decoder/arm64/ihevcd_fmt_conv_420sp_to_rgba8888.s +++ /dev/null @@ -1,528 +0,0 @@ -///***************************************************************************** -//* -//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore -//* -//* Licensed under the Apache License, Version 2.0 (the "License"); -//* you may not use this file except in compliance with the License. -//* You may obtain a copy of the License at: -//* -//* http://www.apache.org/licenses/LICENSE-2.0 -//* -//* Unless required by applicable law or agreed to in writing, software -//* distributed under the License is distributed on an "AS IS" BASIS, -//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -//* See the License for the specific language governing permissions and -//* limitations under the License. -//* -//*****************************************************************************/ -///** -///******************************************************************************* -//* //file -//* ihevcd_fmt_conv_420sp_to_rgba8888.s -//* -//* //brief -//* contains function definitions for format conversions -//* -//* //author -//* ittiam -//* -//* //par list of functions: -//* -//* -//* //remarks -//* none -//* -//*******************************************************************************/ - - .equ DO1STROUNDING, 0 - - // ARM - // - // PRESERVE8 - -.text -.p2align 2 - -.include "ihevc_neon_macros.s" - - - -///***************************************************************************** -//* * -//* Function Name : ihevcd_fmt_conv_420sp_to_rgba8888() * -//* * -//* Description : This function conversts the image from YUV422 color * -//* space to RGB888 color space. The function can be * -//* invoked at the MB level. * -//* * -//* Arguments : x0 pubY * -//* x1 pubUV * -//* x2 pusRGB * -//* x3 pusRGB * -//* [x13 #40] usHeight * -//* [x13 #44] usWidth * -//* [x13 #48] usStrideY * -//* [x13 #52] usStrideU * -//* [x13 #56] usStrideV * -//* [x13 #60] usStrideRGB * -//* * -//* Values Returned : None * -//* * -//* Register Usage : x0 - x14 * -//* * -//* Stack Usage : 40 Bytes * -//* * -//* Interruptibility : Interruptible * -//* * -//* Known Limitations * -//* Assumptions: Image Width: Assumed to be multiple of 16 and * -//* greater than or equal to 16 * -//* Image Height: Assumed to be even. * -//* * -//* Revision History : * -//* DD MM YYYY Author(s) Changes (Describe the changes made) * -//* 07 06 2010 Varshita Draft * -//* 07 06 2010 Naveen Kr T Completed * -//* 05 08 2013 Naveen K P Modified for HEVC * -//*****************************************************************************/ - .global ihevcd_fmt_conv_420sp_to_rgba8888_av8 -.type ihevcd_fmt_conv_420sp_to_rgba8888_av8, function -ihevcd_fmt_conv_420sp_to_rgba8888_av8: - - //// push the registers on the stack - // STMFD sp!,{x4-x12,x14} - - stp d12,d14,[sp,#-16]! - stp d8,d15,[sp,#-16]! // Storing d15 using { sub sp,sp,#8; str d15,[sp] } is giving bus error. - // d8 is used as dummy register and stored along with d15 using stp. d8 is not used in the function. - stp x19, x20,[sp,#-16]! - - - ////x0 - Y PTR - ////x1 - UV PTR - ////x2 - RGB PTR - ////x3 - RGB PTR - ////x4 - PIC WIDTH - ////x5 - PIC HT - ////x6 - STRIDE Y - ////x7 - STRIDE U - ////x8 - STRIDE V - ////x9 - STRIDE RGB - - ////ONE ROW PROCESSING AT A TIME - - ////THE FOUR CONSTANTS ARE: - ////C1=0x3311,C2=0xF379,C3=0xE5F8,C4=0x4092 - - //PLD [x0] - //PLD [x1] - //PLD [x2] - - - ///* can be loaded from a defined const type */ - mov x10,#0x3311 - mov v0.h[0], w10 ////C1 - - mov x10,#0xF379 - mov v0.h[1], w10 ////C2 - - mov x10,#0xE5F8 - mov v0.h[2], w10 ////C3 - - mov x10,#0x4092 - mov v0.h[3], w10 ////C4 - - ////LOAD CONSTANT 128 INTO A CORTEX REGISTER - MOV x10,#128 - dup v1.8b,w10 - - ////D0 HAS C1-C2-C3-C4 - //// load other parameters from stack - mov x9, x7 - mov x7, x6 - mov x6, x5 - mov x5, x4 - //LDR x4,[sp,#44] - //LDR x8,[sp,#52] - - //// calculate offsets, offset = stride - width - SUB x10,x6,x3 //// luma offset - SUB x11,x7,x3 - //, LSR #1 @// u offset - //SUB x12,x8,x3, LSR #1 @// v offset - SUB x14,x9,x3 //// rgb offset in pixels - - //// calculate height loop count - LSR x5, x5, #1 //// height_cnt = height / 16 - - //// create next row pointers for rgb and luma data - ADD x7,x0,x6 //// luma_next_row = luma + luma_stride - ADD x8,x2,x9,LSL #2 //// rgb_next_row = rgb + rgb_stride - -LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP: - - ////LOAD VALUES OF U&V AND COMPUTE THE R,G,B WEIGHT VALUES. - LD1 {v2.8b, v3.8b},[x1],#16 ////LOAD 8 VALUES OF UV - ////VLD1.8 {D3},[x2]! @//LOAD 8 VALUES OF V - - //// calculate width loop count - LSR x6, x3, #4 //// width_cnt = width / 16 - - ////COMPUTE THE ACTUAL RGB VALUES,WE CAN DO TWO ROWS AT A TIME - ////LOAD VALUES OF Y 8-BIT VALUES - LD2 {v30.8b, v31.8b},[x0],#16 ////D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row 1 - ////D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - LD2 {v28.8b, v29.8b},[x7],#16 ////D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row2 - ////D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - - SUBS x6,x6,#1 - BEQ LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP_SKIP - -LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP: - //VMOV.I8 Q1,#128 - UZP1 v27.8b, v2.8b, v3.8b - UZP2 v3.8b, v2.8b, v3.8b - mov v2.d[0], v27.d[0] - - ////NEED TO SUBTRACT (U-128) AND (V-128) - ////(D2-D1),(D3-D1) - uSUBL v4.8h, v2.8b, v1.8b ////(U-128) - uSUBL v6.8h, v3.8b, v1.8b ////(V-128) - - ////LOAD VALUES OF U&V for next row - LD1 {v2.8b, v3.8b},[x1],#16 ////LOAD 8 VALUES OF U - ////VLD1.8 {D3},[x2]! @//LOAD 8 VALUES OF V - - //PLD [x0] - prfm PLDL1KEEP,[x1] - - ////NEED TO MULTIPLY WITH Q2,Q3 WITH CO-EEFICIENTS - sMULL v5.4s, v4.4h, v0.h[3] ////(U-128)*C4 FOR B - sMULL2 v7.4s, v4.8h, v0.h[3] ////(U-128)*C4 FOR B - - sMULL v20.4s, v6.4h, v0.h[0] ////(V-128)*C1 FOR R - sMULL2 v22.4s, v6.8h, v0.h[0] ////(V-128)*C1 FOR R - - sMULL v12.4s, v4.4h, v0.h[1] ////(U-128)*C2 FOR G - sMLAL v12.4s, v6.4h, v0.h[2] ////Q6 = (U-128)*C2 + (V-128)*C3 - sMULL2 v14.4s, v4.8h, v0.h[1] ////(U-128)*C2 FOR G - sMLAL2 v14.4s, v6.8h, v0.h[2] ////Q7 = (U-128)*C2 + (V-128)*C3 - - ////NARROW RIGHT SHIFT BY 13 FOR R&B - sqshrn v5.4h, v5.4s,#13 ////D8 = (U-128)*C4>>13 4 16-BIT VALUES - sqshrn2 v5.8h, v7.4s,#13 ////D9 = (U-128)*C4>>13 4 16-BIT VALUES - ////Q4 - WEIGHT FOR B - - ////NARROW RIGHT SHIFT BY 13 FOR R&B - sqshrn v7.4h, v20.4s,#13 ////D10 = (V-128)*C1>>13 4 16-BIT VALUES - sqshrn2 v7.8h, v22.4s,#13 ////D11 = (V-128)*C1>>13 4 16-BIT VALUES - ////Q5 - WEIGHT FOR R - - ////NARROW RIGHT SHIFT BY 13 FOR G - sqshrn v12.4h, v12.4s,#13 ////D12 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - sqshrn2 v12.8h, v14.4s,#13 ////D13 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - ////Q6 - WEIGHT FOR G - - UADDW v14.8h, v5.8h , v30.8b ////Q7 - HAS Y + B - UADDW v16.8h, v7.8h , v30.8b ////Q8 - HAS Y + R - UADDW v18.8h, v12.8h , v30.8b ////Q9 - HAS Y + G - - UADDW v20.8h, v5.8h , v31.8b ////Q10 - HAS Y + B - UADDW v22.8h, v7.8h , v31.8b ////Q11 - HAS Y + R - UADDW v24.8h, v12.8h , v31.8b ////Q12 - HAS Y + G - - sqxtun v14.8b, v14.8h - sqxtun v15.8b, v18.8h - sqxtun v16.8b, v16.8h - movi v17.8b, #0 - - sqxtun v20.8b, v20.8h - sqxtun v21.8b, v24.8h - sqxtun v22.8b, v22.8h - movi v23.8b, #0 - - ZIP1 v27.8b, v14.8b, v15.8b - ZIP2 v15.8b, v14.8b, v15.8b - mov v14.d[0], v27.d[0] - ZIP1 v27.8b, v16.8b, v17.8b - ZIP2 v17.8b, v16.8b, v17.8b - mov v16.d[0], v27.d[0] - - ZIP1 v27.8b, v20.8b, v21.8b - ZIP2 v21.8b, v20.8b, v21.8b - mov v20.d[0], v27.d[0] - ZIP1 v27.8b, v22.8b, v23.8b - ZIP2 v23.8b, v22.8b, v23.8b - mov v22.d[0], v27.d[0] - - mov v14.d[1], v15.d[0] - mov v20.d[1], v21.d[0] - mov v16.d[1], v17.d[0] - mov v22.d[1], v23.d[0] - - ZIP1 v27.8h, v14.8h, v16.8h - ZIP2 v26.8h, v14.8h, v16.8h - - ZIP1 v25.8h, v20.8h, v22.8h - ZIP2 v19.8h, v20.8h, v22.8h - - ZIP1 v14.4s, v27.4s, v25.4s - ZIP2 v20.4s, v27.4s, v25.4s - - ZIP1 v16.4s, v26.4s, v19.4s - ZIP2 v22.4s, v26.4s, v19.4s - - ST1 {v14.4s},[x2],#16 - ST1 {v20.4s},[x2],#16 - ST1 {v16.4s},[x2],#16 - ST1 {v22.4s},[x2],#16 - - ////D14-D20 - TOALLY HAVE 16 VALUES - ////WE NEED TO SHIFT R,G,B VALUES TO GET 5BIT,6BIT AND 5BIT COMBINATIONS - UADDW v14.8h, v5.8h , v28.8b ////Q7 - HAS Y + B - UADDW v16.8h, v7.8h , v28.8b ////Q2 - HAS Y + R - UADDW v18.8h, v12.8h , v28.8b ////Q3 - HAS Y + G - - UADDW v20.8h, v5.8h , v29.8b ////Q10 - HAS Y + B - UADDW v22.8h, v7.8h , v29.8b ////Q11 - HAS Y + R - UADDW v24.8h, v12.8h , v29.8b ////Q12 - HAS Y + G - - ////COMPUTE THE ACTUAL RGB VALUES,WE CAN DO TWO ROWS AT A TIME - ////LOAD VALUES OF Y 8-BIT VALUES - LD2 {v30.8b, v31.8b},[x0],#16 ////D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row 1 - ////D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - LD2 {v28.8b, v29.8b},[x7],#16 ////D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row2 - ////D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15 - - prfm PLDL1KEEP,[x0] - prfm PLDL1KEEP,[x7] - - sqxtun v14.8b, v14.8h - sqxtun v15.8b, v18.8h - sqxtun v16.8b, v16.8h - movi v17.8b, #0 - - sqxtun v20.8b, v20.8h - sqxtun v21.8b, v24.8h - sqxtun v22.8b, v22.8h - movi v23.8b, #0 - - ZIP1 v27.8b, v14.8b, v15.8b - ZIP2 v15.8b, v14.8b, v15.8b - mov v14.d[0], v27.d[0] - ZIP1 v27.8b, v16.8b, v17.8b - ZIP2 v17.8b, v16.8b, v17.8b - mov v16.d[0], v27.d[0] - - ZIP1 v27.8b, v20.8b, v21.8b - ZIP2 v21.8b, v20.8b, v21.8b - mov v20.d[0], v27.d[0] - ZIP1 v27.8b, v22.8b, v23.8b - ZIP2 v23.8b, v22.8b, v23.8b - mov v22.d[0], v27.d[0] - - mov v14.d[1], v15.d[0] - mov v20.d[1], v21.d[0] - mov v16.d[1], v17.d[0] - mov v22.d[1], v23.d[0] - - ZIP1 v27.8h, v14.8h, v16.8h - ZIP2 v26.8h, v14.8h, v16.8h - - ZIP1 v25.8h, v20.8h, v22.8h - ZIP2 v19.8h, v20.8h, v22.8h - - ZIP1 v14.4s, v27.4s, v25.4s - ZIP2 v20.4s, v27.4s, v25.4s - - ZIP1 v16.4s, v26.4s, v19.4s - ZIP2 v22.4s, v26.4s, v19.4s - - ST1 {v14.4s},[x8],#16 - ST1 {v20.4s},[x8],#16 - ST1 {v16.4s},[x8],#16 - ST1 {v22.4s},[x8],#16 - - SUBS x6,x6,#1 //// width_cnt -= 1 - BNE LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP - -LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP_SKIP: - //VMOV.I8 Q1,#128 - UZP1 v27.8b, v2.8b, v3.8b - UZP2 v3.8b, v2.8b, v3.8b - mov v2.d[0], v27.d[0] - - - ////NEED TO SUBTRACT (U-128) AND (V-128) - ////(D2-D1),(D3-D1) - uSUBL v4.8h, v2.8b, v1.8b ////(U-128) - uSUBL v6.8h, v3.8b, v1.8b ////(V-128) - - - ////NEED TO MULTIPLY WITH Q2,Q3 WITH CO-EEFICIENTS - sMULL v5.4s, v4.4h, v0.h[3] ////(U-128)*C4 FOR B - sMULL2 v7.4s, v4.8h, v0.h[3] ////(U-128)*C4 FOR B - - sMULL v20.4s, v6.4h, v0.h[0] ////(V-128)*C1 FOR R - sMULL2 v22.4s, v6.8h, v0.h[0] ////(V-128)*C1 FOR R - - sMULL v12.4s, v4.4h, v0.h[1] ////(U-128)*C2 FOR G - sMLAL v12.4s, v6.4h, v0.h[2] ////Q6 = (U-128)*C2 + (V-128)*C3 - sMULL2 v14.4s, v4.8h, v0.h[1] ////(U-128)*C2 FOR G - sMLAL2 v14.4s, v6.8h, v0.h[2] ////Q7 = (U-128)*C2 + (V-128)*C3 - - ////NARROW RIGHT SHIFT BY 13 FOR R&B - sqshrn v5.4h, v5.4s,#13 ////D8 = (U-128)*C4>>13 4 16-BIT VALUES - sqshrn2 v5.8h, v7.4s,#13 ////D9 = (U-128)*C4>>13 4 16-BIT VALUES - ////Q4 - WEIGHT FOR B - - ////NARROW RIGHT SHIFT BY 13 FOR R&B - sqshrn v7.4h, v20.4s,#13 ////D10 = (V-128)*C1>>13 4 16-BIT VALUES - sqshrn2 v7.8h, v22.4s,#13 ////D11 = (V-128)*C1>>13 4 16-BIT VALUES - ////Q5 - WEIGHT FOR R - - ////NARROW RIGHT SHIFT BY 13 FOR G - sqshrn v12.4h, v12.4s,#13 ////D12 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - sqshrn2 v12.8h, v14.4s,#13 ////D13 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES - ////Q6 - WEIGHT FOR G - - UADDW v14.8h, v5.8h , v30.8b ////Q7 - HAS Y + B - UADDW v16.8h, v7.8h , v30.8b ////Q8 - HAS Y + R - UADDW v18.8h, v12.8h , v30.8b ////Q9 - HAS Y + G - - UADDW v20.8h, v5.8h , v31.8b ////Q10 - HAS Y + B - UADDW v22.8h, v7.8h , v31.8b ////Q11 - HAS Y + R - UADDW v24.8h, v12.8h , v31.8b ////Q12 - HAS Y + G - - sqxtun v14.8b, v14.8h - sqxtun v15.8b, v18.8h - sqxtun v16.8b, v16.8h - movi v17.8b, #0 - - sqxtun v20.8b, v20.8h - sqxtun v21.8b, v24.8h - sqxtun v22.8b, v22.8h - movi v23.8b, #0 - - ZIP1 v27.8b, v14.8b, v15.8b - ZIP2 v15.8b, v14.8b, v15.8b - mov v14.d[0], v27.d[0] - ZIP1 v27.8b, v16.8b, v17.8b - ZIP2 v17.8b, v16.8b, v17.8b - mov v16.d[0], v27.d[0] - - ZIP1 v27.8b, v20.8b, v21.8b - ZIP2 v21.8b, v20.8b, v21.8b - mov v20.d[0], v27.d[0] - ZIP1 v27.8b, v22.8b, v23.8b - ZIP2 v23.8b, v22.8b, v23.8b - mov v22.d[0], v27.d[0] - - mov v14.d[1], v15.d[0] - mov v20.d[1], v21.d[0] - mov v16.d[1], v17.d[0] - mov v22.d[1], v23.d[0] - - ZIP1 v27.8h, v14.8h, v16.8h - ZIP2 v26.8h, v14.8h, v16.8h - - ZIP1 v25.8h, v20.8h, v22.8h - ZIP2 v19.8h, v20.8h, v22.8h - - ZIP1 v14.4s, v27.4s, v25.4s - ZIP2 v20.4s, v27.4s, v25.4s - - ZIP1 v16.4s, v26.4s, v19.4s - ZIP2 v22.4s, v26.4s, v19.4s - - ST1 {v14.4s},[x2],#16 - ST1 {v20.4s},[x2],#16 - ST1 {v16.4s},[x2],#16 - ST1 {v22.4s},[x2],#16 - - ////D14-D20 - TOALLY HAVE 16 VALUES - ////WE NEED TO SHIFT R,G,B VALUES TO GET 5BIT,6BIT AND 5BIT COMBINATIONS - UADDW v14.8h, v5.8h , v28.8b ////Q7 - HAS Y + B - UADDW v16.8h, v7.8h , v28.8b ////Q2 - HAS Y + R - UADDW v18.8h, v12.8h , v28.8b ////Q3 - HAS Y + G - - UADDW v20.8h, v5.8h , v29.8b ////Q10 - HAS Y + B - UADDW v22.8h, v7.8h , v29.8b ////Q11 - HAS Y + R - UADDW v24.8h, v12.8h , v29.8b ////Q12 - HAS Y + G - - sqxtun v14.8b, v14.8h - sqxtun v15.8b, v18.8h - sqxtun v16.8b, v16.8h - movi v17.8b, #0 - - sqxtun v20.8b, v20.8h - sqxtun v21.8b, v24.8h - sqxtun v22.8b, v22.8h - movi v23.8b, #0 - - ZIP1 v27.8b, v14.8b, v15.8b - ZIP2 v15.8b, v14.8b, v15.8b - mov v14.d[0], v27.d[0] - ZIP1 v27.8b, v16.8b, v17.8b - ZIP2 v17.8b, v16.8b, v17.8b - mov v16.d[0], v27.d[0] - - ZIP1 v27.8b, v20.8b, v21.8b - ZIP2 v21.8b, v20.8b, v21.8b - mov v20.d[0], v27.d[0] - ZIP1 v27.8b, v22.8b, v23.8b - ZIP2 v23.8b, v22.8b, v23.8b - mov v22.d[0], v27.d[0] - - mov v14.d[1], v15.d[0] - mov v20.d[1], v21.d[0] - mov v16.d[1], v17.d[0] - mov v22.d[1], v23.d[0] - - ZIP1 v27.8h, v14.8h, v16.8h - ZIP2 v26.8h, v14.8h, v16.8h - - ZIP1 v25.8h, v20.8h, v22.8h - ZIP2 v19.8h, v20.8h, v22.8h - - ZIP1 v14.4s, v27.4s, v25.4s - ZIP2 v20.4s, v27.4s, v25.4s - - ZIP1 v16.4s, v26.4s, v19.4s - ZIP2 v22.4s, v26.4s, v19.4s - - ST1 {v14.4s},[x8],#16 - ST1 {v20.4s},[x8],#16 - ST1 {v16.4s},[x8],#16 - ST1 {v22.4s},[x8],#16 - - //// Adjust the address pointers - ADD x0,x7,x10 //// luma = luma_next + offset - ADD x2,x8,x14,LSL #2 //// rgb = rgb_next + offset - - ADD x7,x0,x3 //// luma_next = luma + width - ADD x8,x2,x3,LSL #2 //// rgb_next_row = rgb + width - - ADD x1,x1,x11 //// adjust u pointer - //ADD x2,x2,x12 @// adjust v pointer - - ADD x7,x7,x10 //// luma_next = luma + width + offset (because of register crunch) - ADD x8,x8,x14,LSL #2 //// rgb_next_row = rgb + width + offset - - SUBS x5,x5,#1 //// height_cnt -= 1 - - BNE LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP - - ////POP THE REGISTERS - // LDMFD sp!,{x4-x12,PC} - ldp x19, x20,[sp],#16 - ldp d8,d15,[sp],#16 // Loading d15 using { ldr d15,[sp]; add sp,sp,#8 } is giving bus error. - // d8 is used as dummy register and loaded along with d15 using ldp. d8 is not used in the function. - ldp d12,d14,[sp],#16 - ret - - - - - .section .note.GNU-stack,"",%progbits - diff --git a/decoder/arm64/ihevcd_function_selector_av8.c b/decoder/arm64/ihevcd_function_selector_av8.c index 210c730..0556b09 100644 --- a/decoder/arm64/ihevcd_function_selector_av8.c +++ b/decoder/arm64/ihevcd_function_selector_av8.c @@ -54,107 +54,113 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" -void ihevcd_init_function_ptr_av8(codec_t *ps_codec) +void ihevcd_init_function_ptr_av8(func_selector_t *ps_func_selector) { - ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_av8; - ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_av8; - ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_av8; - ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_av8; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_av8; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; - ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_neonintr; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_av8; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_av8; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_av8; - ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; - ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; - ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; - ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_av8; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_av8; - ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_av8; - ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_av8; - ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_av8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4; - ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8; - ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; - ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_av8; - ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy_av8; - ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_av8; - ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset_av8; - ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_av8; - ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit_av8; - ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma_av8; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma_av8; - ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma_av8; - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma_av8; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_av8; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_av8; - ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_av8; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_neonintr; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_neonintr; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_neonintr; - ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_av8; - ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_av8; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_av8; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888_av8; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp_av8; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_av8; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_av8; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_av8; + ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_av8; + ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_av8; + ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_av8; + ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_av8; + ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_av8; + ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_av8; + ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_av8; + ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_av8; + ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_av8; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_av8; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_av8; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_av8; + ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_av8; + ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_av8; + ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_av8; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_av8; + ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_av8; + ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_av8; + ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_av8; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_av8; + ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; + ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_neonintr; + ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_av8; + ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_av8; + ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_av8; + ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_av8; + ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_av8; + ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_av8; + ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_av8; + ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_av8; + ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_av8; + ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_av8; + ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_av8; + ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_av8; + ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_av8; + ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_av8; + ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_av8; + ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_av8; + ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_av8; + ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_av8; + ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_av8; + ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_av8; + ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; + ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; + ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; + ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; + ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; + ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1; + ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4; + ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8; + ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16; + ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32; + ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_av8; + ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_av8; + ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_av8; + ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_av8; + ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_av8; + ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; + ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; + ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; + ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32; + ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; + ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4; + ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8; + ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16; + ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32; + ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; + ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; + ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; + ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32; + ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_av8; + ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy_av8; + ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_av8; + ps_func_selector->ihevc_memset_fptr = &ihevc_memset_av8; + ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_av8; + ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit_av8; + ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma_av8; + ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma_av8; + ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma_av8; + ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma_av8; + ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_av8; + ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_av8; + ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_av8; + ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_neonintr; + ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_neonintr; + ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_neonintr; + ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_av8; + ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_av8; + ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_av8; + ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_av8; + ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_av8; + ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_av8; + ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_av8; + ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_av8; + ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_av8; + ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_av8; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp_av8; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_av8; + ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p; + ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_av8; + ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_av8; + ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc; } diff --git a/decoder/ihevcd_api.c b/decoder/ihevcd_api.c index 031808d..7be6632 100644 --- a/decoder/ihevcd_api.c +++ b/decoder/ihevcd_api.c @@ -226,14 +226,16 @@ static IV_API_CALL_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, if((ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420P) - && (ps_ip->s_ivd_create_ip_t.e_output_format - != IV_YUV_422ILE) - && (ps_ip->s_ivd_create_ip_t.e_output_format - != IV_RGB_565) && (ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420SP_UV) && (ps_ip->s_ivd_create_ip_t.e_output_format - != IV_YUV_420SP_VU)) + != IV_YUV_420SP_VU) + && (ps_ip->s_ivd_create_ip_t.e_output_format + != IV_GRAY) + && (ps_ip->s_ivd_create_ip_t.e_output_format + != IV_YUV_444P) + && (ps_ip->s_ivd_create_ip_t.e_output_format + != IV_YUV_422P)) { ps_op->s_ivd_create_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; @@ -243,6 +245,19 @@ static IV_API_CALL_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, return (IV_FAIL); } + /* Shared display mode is supported only for 420SP and 420P formats */ + if((ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420P) + && (ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420SP_UV) + && (ps_ip->s_ivd_create_ip_t.e_output_format != IV_YUV_420SP_VU) + && ps_ip->s_ivd_create_ip_t.u4_share_disp_buf == 1) + { + ps_op->s_ivd_create_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_create_op_t.u4_error_code |= + IVD_INIT_DEC_FAILED; + return (IV_FAIL); + } + } break; @@ -768,6 +783,7 @@ static IV_API_CALL_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, break; } +#ifndef DISABLE_SEI case IHEVCD_CXA_CMD_CTL_GET_SEI_MASTERING_PARAMS: { ihevcd_cxa_ctl_get_sei_mastering_params_ip_t *ps_ip; @@ -796,6 +812,7 @@ static IV_API_CALL_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, break; } +#endif case IHEVCD_CXA_CMD_CTL_SET_NUM_CORES: { ihevcd_cxa_ctl_set_num_cores_ip_t *ps_ip; @@ -822,15 +839,11 @@ static IV_API_CALL_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, return IV_FAIL; } -#ifdef MULTICORE if((ps_ip->u4_num_cores < 1) || (ps_ip->u4_num_cores > MAX_NUM_CORES)) -#else - if(ps_ip->u4_num_cores != 1) -#endif - { - ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; - return IV_FAIL; - } + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + return IV_FAIL; + } break; } case IHEVCD_CXA_CMD_CTL_SET_PROCESSOR: @@ -969,6 +982,13 @@ void ihevcd_update_function_ptr(codec_t *ps_codec) ps_codec->apf_intra_pred_chroma[9] = (pf_intra_pred)ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr; ps_codec->apf_intra_pred_chroma[10] = (pf_intra_pred)ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr; + /* Init itrans res function array */ + ps_codec->apf_itrans_res[0] = (pf_itrans_res)ps_codec->s_func_selector.ihevc_itrans_res_4x4_ttype1_fptr; + ps_codec->apf_itrans_res[1] = (pf_itrans_res)ps_codec->s_func_selector.ihevc_itrans_res_4x4_fptr; + ps_codec->apf_itrans_res[2] = (pf_itrans_res)ps_codec->s_func_selector.ihevc_itrans_res_8x8_fptr; + ps_codec->apf_itrans_res[3] = (pf_itrans_res)ps_codec->s_func_selector.ihevc_itrans_res_16x16_fptr; + ps_codec->apf_itrans_res[4] = (pf_itrans_res)ps_codec->s_func_selector.ihevc_itrans_res_32x32_fptr; + /* Init itrans_recon function array */ ps_codec->apf_itrans_recon[0] = (pf_itrans_recon)ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr; ps_codec->apf_itrans_recon[1] = (pf_itrans_recon)ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr; @@ -978,6 +998,7 @@ void ihevcd_update_function_ptr(codec_t *ps_codec) ps_codec->apf_itrans_recon[5] = (pf_itrans_recon)ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr; ps_codec->apf_itrans_recon[6] = (pf_itrans_recon)ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr; ps_codec->apf_itrans_recon[7] = (pf_itrans_recon)ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr; + ps_codec->apf_itrans_recon[8] = (pf_itrans_recon)ps_codec->s_func_selector.ihevc_chroma_itrans_recon_32x32_fptr; /* Init recon function array */ ps_codec->apf_recon[0] = (pf_recon)ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr; @@ -988,11 +1009,14 @@ void ihevcd_update_function_ptr(codec_t *ps_codec) ps_codec->apf_recon[5] = (pf_recon)ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr; ps_codec->apf_recon[6] = (pf_recon)ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr; ps_codec->apf_recon[7] = (pf_recon)ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr; + ps_codec->apf_recon[8] = (pf_recon)ps_codec->s_func_selector.ihevc_chroma_recon_32x32_fptr; /* Init itrans_recon_dc function array */ ps_codec->apf_itrans_recon_dc[0] = (pf_itrans_recon_dc)ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr; ps_codec->apf_itrans_recon_dc[1] = (pf_itrans_recon_dc)ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr; + ps_codec->apf_itrans_res_dc = (pf_itrans_res_dc)ps_codec->s_func_selector.ihevcd_itrans_res_dc_fptr; + /* Init sao function array */ ps_codec->apf_sao_luma[0] = (pf_sao_luma)ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr; ps_codec->apf_sao_luma[1] = (pf_sao_luma)ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr; @@ -1216,6 +1240,7 @@ WORD32 ihevcd_allocate_static_bufs(iv_obj_t **pps_codec_obj, ps_codec->pf_aligned_alloc = pf_aligned_alloc; ps_codec->pf_aligned_free = pf_aligned_free; ps_codec->pv_mem_ctxt = pv_mem_ctxt; + ps_codec->i4_threads_active = ps_create_ip->u4_keep_threads_active; /* Request memory to hold thread handles for each processing thread */ size = MAX_PROCESS_THREADS * ithread_get_handle_size(); @@ -1230,52 +1255,52 @@ WORD32 ihevcd_allocate_static_bufs(iv_obj_t **pps_codec_obj, (UWORD8 *)pv_buf + (i * handle_size); } -#ifdef KEEP_THREADS_ACTIVE - /* Request memory to hold mutex (start/done) for each processing thread */ - size = 2 * MAX_PROCESS_THREADS * ithread_get_mutex_lock_size(); - pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size); - RETURN_IF((NULL == pv_buf), IV_FAIL); - memset(pv_buf, 0, size); - - for(i = 0; i < MAX_PROCESS_THREADS; i++) + if(ps_codec->i4_threads_active) { - WORD32 ret; - WORD32 mutex_size = ithread_get_mutex_lock_size(); - ps_codec->apv_proc_start_mutex[i] = - (UWORD8 *)pv_buf + (2 * i * mutex_size); - ps_codec->apv_proc_done_mutex[i] = - (UWORD8 *)pv_buf + ((2 * i + 1) * mutex_size); + /* Request memory to hold mutex (start/done) for each processing thread */ + size = 2 * MAX_PROCESS_THREADS * ithread_get_mutex_lock_size(); + pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size); + RETURN_IF((NULL == pv_buf), IV_FAIL); + memset(pv_buf, 0, size); - ret = ithread_mutex_init(ps_codec->apv_proc_start_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + WORD32 ret; + WORD32 mutex_size = ithread_get_mutex_lock_size(); + ps_codec->apv_proc_start_mutex[i] = + (UWORD8 *)pv_buf + (2 * i * mutex_size); + ps_codec->apv_proc_done_mutex[i] = + (UWORD8 *)pv_buf + ((2 * i + 1) * mutex_size); - ret = ithread_mutex_init(ps_codec->apv_proc_done_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + ret = ithread_mutex_init(ps_codec->apv_proc_start_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + + ret = ithread_mutex_init(ps_codec->apv_proc_done_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + } + + size = 2 * MAX_PROCESS_THREADS * ithread_get_cond_struct_size(); + pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size); + RETURN_IF((NULL == pv_buf), IV_FAIL); + memset(pv_buf, 0, size); + + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + WORD32 ret; + WORD32 cond_size = ithread_get_cond_struct_size(); + ps_codec->apv_proc_start_condition[i] = + (UWORD8 *)pv_buf + (2 * i * cond_size); + ps_codec->apv_proc_done_condition[i] = + (UWORD8 *)pv_buf + ((2 * i + 1) * cond_size); + + ret = ithread_cond_init(ps_codec->apv_proc_start_condition[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + + ret = ithread_cond_init(ps_codec->apv_proc_done_condition[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + } } - size = 2 * MAX_PROCESS_THREADS * ithread_get_cond_struct_size(); - pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size); - RETURN_IF((NULL == pv_buf), IV_FAIL); - memset(pv_buf, 0, size); - - for(i = 0; i < MAX_PROCESS_THREADS; i++) - { - WORD32 ret; - WORD32 cond_size = ithread_get_cond_struct_size(); - ps_codec->apv_proc_start_condition[i] = - (UWORD8 *)pv_buf + (2 * i * cond_size); - ps_codec->apv_proc_done_condition[i] = - (UWORD8 *)pv_buf + ((2 * i + 1) * cond_size); - - ret = ithread_cond_init(ps_codec->apv_proc_start_condition[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - - ret = ithread_cond_init(ps_codec->apv_proc_done_condition[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - } - -#endif - /* Request memory for static bitstream buffer which holds bitstream after emulation prevention */ size = MIN_BITSBUF_SIZE; pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size + 16); //Alloc extra for parse optimization @@ -1391,19 +1416,31 @@ WORD32 ihevcd_allocate_static_bufs(iv_obj_t **pps_codec_obj, { WORD32 inter_pred_tmp_buf_size, ntaps_luma; +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD32 res_buf_size; +#endif WORD32 pic_pu_idx_map_size; /* Max inter pred size */ ntaps_luma = 8; - inter_pred_tmp_buf_size = sizeof(WORD16) * (MAX_CTB_SIZE + ntaps_luma) * MAX_CTB_SIZE; + // For yuv 4:4:4 chroma the inter pred buffer size for one CTB will be double of luma + inter_pred_tmp_buf_size = sizeof(WORD16) * (MAX_CTB_SIZE + ntaps_luma) * MAX_CTB_SIZE * 2; inter_pred_tmp_buf_size = ALIGN64(inter_pred_tmp_buf_size); +#ifdef ENABLE_MAIN_REXT_PROFILE + res_buf_size = sizeof(WORD16) * (MAX_TU_SIZE * MAX_TU_SIZE); + res_buf_size = ALIGN64(res_buf_size); +#endif + /* To hold pu_index w.r.t. frame level pu_t array for a CTB */ pic_pu_idx_map_size = sizeof(WORD32) * (18 * 18); pic_pu_idx_map_size = ALIGN64(pic_pu_idx_map_size); size = inter_pred_tmp_buf_size * 2; +#ifdef ENABLE_MAIN_REXT_PROFILE + size += (res_buf_size * 4); +#endif size += pic_pu_idx_map_size; size *= MAX_PROCESS_THREADS; @@ -1419,6 +1456,17 @@ WORD32 ihevcd_allocate_static_bufs(iv_obj_t **pps_codec_obj, ps_codec->as_process[i].pi2_inter_pred_tmp_buf2 = (WORD16 *)pu1_buf; pu1_buf += inter_pred_tmp_buf_size; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_codec->as_process[i].pi2_res_luma_buf = (WORD16 *)pu1_buf; + pu1_buf += res_buf_size; + + ps_codec->as_process[i].pi2_res_chroma_buf = (WORD16 *)pu1_buf; + pu1_buf += res_buf_size; + + ps_codec->as_process[i].pi2_invscan_out_subtu = (WORD16 *)pu1_buf; + pu1_buf += (res_buf_size * 2); +#endif + /* Inverse transform intermediate and inverse scan output buffers reuse inter pred scratch buffers */ ps_codec->as_process[i].pi2_itrans_intrmd_buf = ps_codec->as_process[i].pi2_inter_pred_tmp_buf2; @@ -1464,6 +1512,37 @@ WORD32 ihevcd_allocate_static_bufs(iv_obj_t **pps_codec_obj, return (status); } +WORD32 ihevcd_join_threads(codec_t *ps_codec) +{ + if(ps_codec->i4_threads_active) + { + int i; + /* Wait for threads */ + ps_codec->i4_break_threads = 1; + + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + WORD32 ret; + if(ps_codec->ai4_process_thread_created[i]) + { + ret = ithread_mutex_lock(ps_codec->apv_proc_start_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + + ps_codec->ai4_process_start[i] = 1; + ret = ithread_cond_signal(ps_codec->apv_proc_start_condition[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + + ret = ithread_mutex_unlock(ps_codec->apv_proc_start_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + + ithread_join(ps_codec->apv_process_thread_handle[i], NULL); + + ps_codec->ai4_process_thread_created[i] = 0; + } + } + } + return IV_SUCCESS; +} /** ******************************************************************************* * @@ -1494,43 +1573,29 @@ WORD32 ihevcd_free_static_bufs(iv_obj_t *ps_codec_obj) pf_aligned_free = ps_codec->pf_aligned_free; pv_mem_ctxt = ps_codec->pv_mem_ctxt; -#ifdef KEEP_THREADS_ACTIVE - /* Wait for threads */ - ps_codec->i4_break_threads = 1; - for(int i = 0; i < MAX_PROCESS_THREADS; i++) + if(ps_codec->i4_threads_active) { - WORD32 ret; - if(ps_codec->ai4_process_thread_created[i]) + /* Wait for threads */ + ihevcd_join_threads(ps_codec); + + for(int i = 0; i < MAX_PROCESS_THREADS; i++) { - ret = ithread_mutex_lock(ps_codec->apv_proc_start_mutex[i]); + WORD32 ret; + ret = ithread_cond_destroy(ps_codec->apv_proc_start_condition[i]); RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - ps_codec->ai4_process_start[i] = 1; - ret = ithread_cond_signal(ps_codec->apv_proc_start_condition[i]); + ret = ithread_cond_destroy(ps_codec->apv_proc_done_condition[i]); RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - ret = ithread_mutex_unlock(ps_codec->apv_proc_start_mutex[i]); + ret = ithread_mutex_destroy(ps_codec->apv_proc_start_mutex[i]); RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - ithread_join(ps_codec->apv_process_thread_handle[i], NULL); - - ps_codec->ai4_process_thread_created[i] = 0; + ret = ithread_mutex_destroy(ps_codec->apv_proc_done_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); } - ret = ithread_cond_destroy(ps_codec->apv_proc_start_condition[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - - ret = ithread_cond_destroy(ps_codec->apv_proc_done_condition[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - - ret = ithread_mutex_destroy(ps_codec->apv_proc_start_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - - ret = ithread_mutex_destroy(ps_codec->apv_proc_done_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + ALIGNED_FREE(ps_codec, ps_codec->apv_proc_start_mutex[0]); + ALIGNED_FREE(ps_codec, ps_codec->apv_proc_start_condition[0]); } - ALIGNED_FREE(ps_codec, ps_codec->apv_proc_start_mutex[0]); - ALIGNED_FREE(ps_codec, ps_codec->apv_proc_start_condition[0]); -#endif ALIGNED_FREE(ps_codec, ps_codec->apv_process_thread_handle[0]); ALIGNED_FREE(ps_codec, ps_codec->pu1_bitsbuf_static); @@ -1751,41 +1816,62 @@ WORD32 ihevcd_allocate_dynamic_bufs(codec_t *ps_codec) memset(ps_codec->as_process[0].pu4_pic_pu_idx_top, 0, sizeof(UWORD32) * (wd / 4 + 1)); { + sps_t *ps_sps = (ps_codec->s_parse.ps_sps_base + ps_codec->i4_sps_id); + /* To hold SAO left buffer for luma */ size = sizeof(UWORD8) * (MAX(ht, wd)); /* To hold SAO left buffer for chroma */ - size += sizeof(UWORD8) * (MAX(ht, wd)); + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + size += sizeof(UWORD8) * (MAX(ht, wd)); + } /* To hold SAO top buffer for luma */ size += sizeof(UWORD8) * wd; /* To hold SAO top buffer for chroma */ - size += sizeof(UWORD8) * wd; + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + size += sizeof(UWORD8) * wd; + } /* To hold SAO top left luma pixel value for last output ctb in a row*/ size += sizeof(UWORD8) * max_ctb_rows; /* To hold SAO top left chroma pixel value last output ctb in a row*/ - size += sizeof(UWORD8) * max_ctb_rows * 2; + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + size += sizeof(UWORD8) * max_ctb_rows * 2; + } /* To hold SAO top left pixel luma for current ctb - column array*/ size += sizeof(UWORD8) * max_ctb_rows; /* To hold SAO top left pixel chroma for current ctb-column array*/ - size += sizeof(UWORD8) * max_ctb_rows * 2; + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + size += sizeof(UWORD8) * max_ctb_rows * 2; + } /* To hold SAO top right pixel luma pixel value last output ctb in a row*/ size += sizeof(UWORD8) * max_ctb_cols; /* To hold SAO top right pixel chroma pixel value last output ctb in a row*/ - size += sizeof(UWORD8) * max_ctb_cols * 2; + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + size += sizeof(UWORD8) * max_ctb_cols * 2; + } /*To hold SAO botton bottom left pixels for luma*/ size += sizeof(UWORD8) * max_ctb_rows; - /*To hold SAO botton bottom left pixels for luma*/ - size += sizeof(UWORD8) * max_ctb_rows * 2; + /*To hold SAO botton bottom left pixels for chroma*/ + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + size += sizeof(UWORD8) * max_ctb_rows * 2; + } + size = ALIGN64(size); pu1_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size); @@ -1799,12 +1885,16 @@ WORD32 ihevcd_allocate_dynamic_bufs(codec_t *ps_codec) ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_left_luma = (UWORD8 *)pu1_buf; pu1_buf += MAX(ht, wd); - for(i = 0; i < MAX_PROCESS_THREADS; i++) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { - ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_left_chroma = (UWORD8 *)pu1_buf; + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_left_chroma = (UWORD8 *)pu1_buf; + } + ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_left_chroma = (UWORD8 *)pu1_buf; + pu1_buf += MAX(ht, wd); } - ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_left_chroma = (UWORD8 *)pu1_buf; - pu1_buf += MAX(ht, wd); + for(i = 0; i < MAX_PROCESS_THREADS; i++) { ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_luma = (UWORD8 *)pu1_buf; @@ -1812,12 +1902,16 @@ WORD32 ihevcd_allocate_dynamic_bufs(codec_t *ps_codec) ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_luma = (UWORD8 *)pu1_buf; pu1_buf += wd; - for(i = 0; i < MAX_PROCESS_THREADS; i++) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { - ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_chroma = (UWORD8 *)pu1_buf; + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_chroma = (UWORD8 *)pu1_buf; + } + ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_chroma = (UWORD8 *)pu1_buf; + pu1_buf += wd; } - ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_chroma = (UWORD8 *)pu1_buf; - pu1_buf += wd; + for(i = 0; i < MAX_PROCESS_THREADS; i++) { ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_luma_top_left_ctb = (UWORD8 *)pu1_buf; @@ -1825,12 +1919,15 @@ WORD32 ihevcd_allocate_dynamic_bufs(codec_t *ps_codec) ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_luma_top_left_ctb = (UWORD8 *)pu1_buf; pu1_buf += ht / MIN_CTB_SIZE; - for(i = 0; i < MAX_PROCESS_THREADS; i++) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { - ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_chroma_top_left_ctb = (UWORD8 *)pu1_buf; + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_chroma_top_left_ctb = (UWORD8 *)pu1_buf; + } + ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_chroma_top_left_ctb = (UWORD8 *)pu1_buf; + pu1_buf += (ht / MIN_CTB_SIZE) * 2; } - ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_chroma_top_left_ctb = (UWORD8 *)pu1_buf; - pu1_buf += (ht / MIN_CTB_SIZE) * 2; for(i = 0; i < MAX_PROCESS_THREADS; i++) { @@ -1839,27 +1936,32 @@ WORD32 ihevcd_allocate_dynamic_bufs(codec_t *ps_codec) ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_luma_curr_ctb = (UWORD8 *)pu1_buf; pu1_buf += ht / MIN_CTB_SIZE; - for(i = 0; i < MAX_PROCESS_THREADS; i++) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { - ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_chroma_curr_ctb = (UWORD8 *)pu1_buf; + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_chroma_curr_ctb = (UWORD8 *)pu1_buf; + } + ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_chroma_curr_ctb = (UWORD8 *)pu1_buf; + pu1_buf += (ht / MIN_CTB_SIZE) * 2; } - ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_chroma_curr_ctb = (UWORD8 *)pu1_buf; - pu1_buf += (ht / MIN_CTB_SIZE) * 2; for(i = 0; i < MAX_PROCESS_THREADS; i++) { ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_luma_top_right = (UWORD8 *)pu1_buf; } ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_luma_top_right = (UWORD8 *)pu1_buf; - pu1_buf += wd / MIN_CTB_SIZE; - for(i = 0; i < MAX_PROCESS_THREADS; i++) - { - ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_chroma_top_right = (UWORD8 *)pu1_buf; - } - ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_chroma_top_right = (UWORD8 *)pu1_buf; - pu1_buf += (wd / MIN_CTB_SIZE) * 2; + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_chroma_top_right = (UWORD8 *)pu1_buf; + } + ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_chroma_top_right = (UWORD8 *)pu1_buf; + pu1_buf += (wd / MIN_CTB_SIZE) * 2; + } /*Per CTB, Store 1 value for luma , 2 values for chroma*/ for(i = 0; i < MAX_PROCESS_THREADS; i++) @@ -1867,16 +1969,17 @@ WORD32 ihevcd_allocate_dynamic_bufs(codec_t *ps_codec) ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_luma_bot_left = (UWORD8 *)pu1_buf; } ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_luma_bot_left = (UWORD8 *)pu1_buf; - pu1_buf += (ht / MIN_CTB_SIZE); - for(i = 0; i < MAX_PROCESS_THREADS; i++) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { - ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_chroma_bot_left = (UWORD8 *)pu1_buf; + for(i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->as_process[i].s_sao_ctxt.pu1_sao_src_top_left_chroma_bot_left = (UWORD8 *)pu1_buf; + } + ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_chroma_bot_left = (UWORD8 *)pu1_buf; + pu1_buf += (ht / MIN_CTB_SIZE) * 2; } - ps_codec->s_parse.s_sao_ctxt.pu1_sao_src_top_left_chroma_bot_left = (UWORD8 *)pu1_buf; - - pu1_buf += (ht / MIN_CTB_SIZE) * 2; } @@ -2015,20 +2118,38 @@ WORD32 ihevcd_allocate_dynamic_bufs(codec_t *ps_codec) ps_codec->as_process[i].s_sao_ctxt.ps_pic_sao = ps_codec->s_parse.ps_pic_sao; } - /* Only if width * height * 3 / 2 is greater than MIN_BITSBUF_SIZE, + /* Only if (frame size * compression factor) is greater than MIN_BITSBUF_SIZE, then allocate dynamic bistream buffer */ ps_codec->pu1_bitsbuf_dynamic = NULL; - size = wd * ht; - if(size > MIN_BITSBUF_SIZE) { - pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size + 16); //Alloc extra for parse optimization - RETURN_IF((NULL == pv_buf), IV_FAIL); - memset(pv_buf, 0, size + 16); - ps_codec->pu1_bitsbuf_dynamic = pv_buf; - ps_codec->u4_bitsbuf_size_dynamic = size; + sps_t *ps_sps = (ps_codec->s_parse.ps_sps_base + ps_codec->i4_sps_id); + + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + size = wd * ht * 3; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + size = wd * ht * 2; + } + else + { + size = wd * ht; + } + + if(size > MIN_BITSBUF_SIZE) + { + //Alloc extra for parse optimization + pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size + 16); + RETURN_IF((NULL == pv_buf), IV_FAIL); + + memset(pv_buf, 0, size + 16); + ps_codec->pu1_bitsbuf_dynamic = pv_buf; + ps_codec->u4_bitsbuf_size_dynamic = size; + } } - size = ihevcd_get_tu_data_size(wd * ht); + size = ihevcd_get_tu_data_size(ps_codec, wd * ht); pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size); RETURN_IF((NULL == pv_buf), IV_FAIL); memset(pv_buf, 0, size); @@ -2249,6 +2370,10 @@ WORD32 ihevcd_create(iv_obj_t *ps_codec_obj, return IV_FAIL; } ps_codec = (codec_t *)ps_codec_obj->pv_codec_handle; + if (ps_create_ip->u4_enable_yuv_formats == 0) { + ps_create_ip->u4_enable_yuv_formats = 1 << CHROMA_FMT_IDC_YUV420; + } + ps_codec->u4_enable_yuv_formats = ps_create_ip->u4_enable_yuv_formats; ret = ihevcd_init(ps_codec); TRACE_INIT(NULL); @@ -2453,6 +2578,7 @@ WORD32 ihevcd_set_flush_mode(iv_obj_t *ps_codec_obj, ivd_ctl_flush_op_t *ps_ctl_op = (ivd_ctl_flush_op_t *)pv_api_op; UNUSED(pv_api_ip); ps_codec = (codec_t *)(ps_codec_obj->pv_codec_handle); + ihevcd_join_threads(ps_codec); /* Signal flush frame control call */ ps_codec->i4_flush_mode = 1; @@ -2517,15 +2643,15 @@ WORD32 ihevcd_get_status(iv_obj_t *ps_codec_obj, ps_ctl_op->u4_min_num_in_bufs = MIN_IN_BUFS; if(ps_codec->e_chroma_fmt == IV_YUV_420P) ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420; - else if(ps_codec->e_chroma_fmt == IV_YUV_422ILE) - ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE; - else if(ps_codec->e_chroma_fmt == IV_RGB_565) - ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565; - else if(ps_codec->e_chroma_fmt == IV_RGBA_8888) - ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGBA8888; + else if(ps_codec->e_chroma_fmt == IV_YUV_444P) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_444; + else if(ps_codec->e_chroma_fmt == IV_YUV_422P) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422; else if((ps_codec->e_chroma_fmt == IV_YUV_420SP_UV) || (ps_codec->e_chroma_fmt == IV_YUV_420SP_VU)) ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420SP; + else if(ps_codec->e_chroma_fmt == IV_GRAY) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_GRAY; ps_ctl_op->u4_num_disp_bufs = 1; @@ -2590,23 +2716,11 @@ WORD32 ihevcd_get_status(iv_obj_t *ps_codec_obj, ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht) >> 2; ps_ctl_op->u4_min_out_buf_size[2] = (wd * ht) >> 2; } - else if(ps_codec->e_chroma_fmt == IV_YUV_422ILE) + else if(ps_codec->e_chroma_fmt == IV_YUV_444P) { - ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht) * 2; - ps_ctl_op->u4_min_out_buf_size[1] = - ps_ctl_op->u4_min_out_buf_size[2] = 0; - } - else if(ps_codec->e_chroma_fmt == IV_RGB_565) - { - ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht) * 2; - ps_ctl_op->u4_min_out_buf_size[1] = - ps_ctl_op->u4_min_out_buf_size[2] = 0; - } - else if(ps_codec->e_chroma_fmt == IV_RGBA_8888) - { - ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht) * 4; - ps_ctl_op->u4_min_out_buf_size[1] = - ps_ctl_op->u4_min_out_buf_size[2] = 0; + ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[2] = (wd * ht); } else if((ps_codec->e_chroma_fmt == IV_YUV_420SP_UV) || (ps_codec->e_chroma_fmt == IV_YUV_420SP_VU)) @@ -2615,6 +2729,18 @@ WORD32 ihevcd_get_status(iv_obj_t *ps_codec_obj, ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht) >> 1; ps_ctl_op->u4_min_out_buf_size[2] = 0; } + else if(ps_codec->e_chroma_fmt == IV_GRAY) + { + ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[1] = 0; + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + else if(ps_codec->e_chroma_fmt == IV_YUV_422P) + { + ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht) >> 1; + ps_ctl_op->u4_min_out_buf_size[2] = (wd * ht) >> 1; + } ps_ctl_op->u4_pic_ht = ht; ps_ctl_op->u4_pic_wd = wd; ps_ctl_op->u4_frame_rate = 30000; @@ -2677,15 +2803,15 @@ WORD32 ihevcd_get_buf_info(iv_obj_t *ps_codec_obj, ps_ctl_op->u4_min_num_in_bufs = MIN_IN_BUFS; if(ps_codec->e_chroma_fmt == IV_YUV_420P) ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420; - else if(ps_codec->e_chroma_fmt == IV_YUV_422ILE) - ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE; - else if(ps_codec->e_chroma_fmt == IV_RGB_565) - ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565; - else if(ps_codec->e_chroma_fmt == IV_RGBA_8888) - ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGBA8888; + else if(ps_codec->e_chroma_fmt == IV_YUV_444P) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_444; + else if(ps_codec->e_chroma_fmt == IV_YUV_422P) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422; else if((ps_codec->e_chroma_fmt == IV_YUV_420SP_UV) || (ps_codec->e_chroma_fmt == IV_YUV_420SP_VU)) ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420SP; + else if(ps_codec->e_chroma_fmt == IV_GRAY) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_GRAY; ps_ctl_op->u4_num_disp_bufs = 1; @@ -2694,7 +2820,25 @@ WORD32 ihevcd_get_buf_info(iv_obj_t *ps_codec_obj, wd = ALIGN64(ps_codec->i4_wd); ht = ALIGN64(ps_codec->i4_ht); - ps_ctl_op->u4_min_in_buf_size[i] = MAX((wd * ht), MIN_BITSBUF_SIZE); + if(ps_codec->i4_sps_done) + { + sps_t *ps_sps = (ps_codec->s_parse.ps_sps_base + ps_codec->i4_sps_id); + + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + ps_ctl_op->u4_min_in_buf_size[i] = MAX((wd * ht * 3), MIN_BITSBUF_SIZE); + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + ps_ctl_op->u4_min_in_buf_size[i] = MAX((wd * ht * 2), MIN_BITSBUF_SIZE); + else + ps_ctl_op->u4_min_in_buf_size[i] = MAX((wd * ht), MIN_BITSBUF_SIZE); + } + else + { +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_ctl_op->u4_min_in_buf_size[i] = MAX((wd * ht * 3), MIN_BITSBUF_SIZE); +#else + ps_ctl_op->u4_min_in_buf_size[i] = MAX((wd * ht), MIN_BITSBUF_SIZE); +#endif + } } wd = 0; @@ -2760,23 +2904,11 @@ WORD32 ihevcd_get_buf_info(iv_obj_t *ps_codec_obj, ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht) >> 2; ps_ctl_op->u4_min_out_buf_size[2] = (wd * ht) >> 2; } - else if(ps_codec->e_chroma_fmt == IV_YUV_422ILE) + else if(ps_codec->e_chroma_fmt == IV_YUV_444P) { - ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht) * 2; - ps_ctl_op->u4_min_out_buf_size[1] = - ps_ctl_op->u4_min_out_buf_size[2] = 0; - } - else if(ps_codec->e_chroma_fmt == IV_RGB_565) - { - ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht) * 2; - ps_ctl_op->u4_min_out_buf_size[1] = - ps_ctl_op->u4_min_out_buf_size[2] = 0; - } - else if(ps_codec->e_chroma_fmt == IV_RGBA_8888) - { - ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht) * 4; - ps_ctl_op->u4_min_out_buf_size[1] = - ps_ctl_op->u4_min_out_buf_size[2] = 0; + ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[2] = (wd * ht); } else if((ps_codec->e_chroma_fmt == IV_YUV_420SP_UV) || (ps_codec->e_chroma_fmt == IV_YUV_420SP_VU)) @@ -2785,6 +2917,18 @@ WORD32 ihevcd_get_buf_info(iv_obj_t *ps_codec_obj, ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht) >> 1; ps_ctl_op->u4_min_out_buf_size[2] = 0; } + else if(ps_codec->e_chroma_fmt == IV_GRAY) + { + ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[1] = + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + else if(ps_codec->e_chroma_fmt == IV_YUV_422P) + { + ps_ctl_op->u4_min_out_buf_size[0] = (wd * ht); + ps_ctl_op->u4_min_out_buf_size[1] = (wd * ht) >> 1; + ps_ctl_op->u4_min_out_buf_size[2] = (wd * ht) >> 1; + } ps_codec->i4_num_disp_bufs = ps_ctl_op->u4_num_disp_bufs; return IV_SUCCESS; @@ -2943,6 +3087,8 @@ WORD32 ihevcd_reset(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) if(ps_codec != NULL) { DEBUG("\nReset called \n"); + ihevcd_join_threads(ps_codec); + ihevcd_init(ps_codec); } else @@ -3175,6 +3321,24 @@ WORD32 ihevcd_get_frame_dimensions(iv_obj_t *ps_codec_obj, ps_op->u4_buffer_wd[1] <<= 1; ps_op->u4_x_offset[1] <<= 1; } + else if(ps_codec->e_chroma_fmt == IV_YUV_444P) + { + ps_op->u4_disp_wd[1] = ps_op->u4_disp_wd[2] = ps_op->u4_disp_wd[0]; + ps_op->u4_disp_ht[1] = ps_op->u4_disp_ht[2] = ps_op->u4_disp_ht[0]; + ps_op->u4_buffer_wd[1] = ps_op->u4_buffer_wd[2] = ps_op->u4_buffer_wd[0]; + ps_op->u4_buffer_ht[1] = ps_op->u4_buffer_ht[2] = ps_op->u4_buffer_ht[0]; + ps_op->u4_x_offset[1] = ps_op->u4_x_offset[2] = ps_op->u4_x_offset[0]; + ps_op->u4_y_offset[1] = ps_op->u4_y_offset[2] = ps_op->u4_y_offset[0]; + } + else if(ps_codec->e_chroma_fmt == IV_GRAY) + { + ps_op->u4_disp_wd[1] = ps_op->u4_disp_wd[2] = 0; + ps_op->u4_disp_ht[1] = ps_op->u4_disp_ht[2] = 0; + ps_op->u4_buffer_wd[1] = ps_op->u4_buffer_wd[2] = 0; + ps_op->u4_buffer_ht[1] = ps_op->u4_buffer_ht[2] = 0; + ps_op->u4_x_offset[1] = ps_op->u4_x_offset[2] = 0; + ps_op->u4_y_offset[1] = ps_op->u4_y_offset[2] = 0; + } return IV_SUCCESS; @@ -3321,6 +3485,7 @@ WORD32 ihevcd_get_vui_params(iv_obj_t *ps_codec_obj, return IV_SUCCESS; } +#ifndef DISABLE_SEI /** ******************************************************************************* * @@ -3396,6 +3561,7 @@ WORD32 ihevcd_get_sei_mastering_params(iv_obj_t *ps_codec_obj, return IV_SUCCESS; } +#endif /** ******************************************************************************* @@ -3504,11 +3670,7 @@ WORD32 ihevcd_set_num_cores(iv_obj_t *ps_codec_obj, ps_ip = (ihevcd_cxa_ctl_set_num_cores_ip_t *)pv_api_ip; ps_op = (ihevcd_cxa_ctl_set_num_cores_op_t *)pv_api_op; -#ifdef MULTICORE ps_codec->i4_num_cores = ps_ip->u4_num_cores; -#else - ps_codec->i4_num_cores = 1; -#endif ps_op->u4_error_code = 0; return IV_SUCCESS; } @@ -3634,8 +3796,17 @@ WORD32 ihevcd_ctl(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) (void *)pv_api_op); break; case IHEVCD_CXA_CMD_CTL_GET_SEI_MASTERING_PARAMS: +#ifndef DISABLE_SEI ret = ihevcd_get_sei_mastering_params(ps_codec_obj, (void *)pv_api_ip, (void *)pv_api_op); +#else + { + ihevcd_cxa_ctl_get_sei_mastering_params_op_t *ps_op = + (ihevcd_cxa_ctl_get_sei_mastering_params_op_t *)pv_api_op; + ps_op->u4_error_code = IHEVCD_SEI_MASTERING_PARAMS_NOT_FOUND; + return IV_FAIL; + } +#endif break; case IHEVCD_CXA_CMD_CTL_SET_PROCESSOR: ret = ihevcd_set_processor(ps_codec_obj, (void *)pv_api_ip, diff --git a/decoder/ihevcd_cabac.c b/decoder/ihevcd_cabac.c index 7e24f02..84cee69 100644 --- a/decoder/ihevcd_cabac.c +++ b/decoder/ihevcd_cabac.c @@ -133,7 +133,12 @@ IHEVCD_ERROR_T ihevcd_cabac_init(cab_ctxt_t *ps_cabac, bitstrm_t *ps_bitstrm, WORD32 qp, WORD32 cabac_init_idc, - const UWORD8 *pu1_init_ctxt) + const UWORD8 *pu1_init_ctxt +#ifdef ENABLE_MAIN_REXT_PROFILE + , + const WORD32 *pi4_rice_stat_coeff +#endif + ) { /* Sanity checks */ ASSERT(ps_cabac != NULL); @@ -162,6 +167,16 @@ IHEVCD_ERROR_T ihevcd_cabac_init(cab_ctxt_t *ps_cabac, memcpy(ps_cabac->au1_ctxt_models, pu1_init_ctxt, IHEVC_CAB_CTXT_END); + +#ifdef ENABLE_MAIN_REXT_PROFILE + /* golomb rice statistics */ + if(pi4_rice_stat_coeff) + { + memcpy(ps_cabac->ai4_rice_stat_coeff, pi4_rice_stat_coeff, + sizeof(ps_cabac->ai4_rice_stat_coeff)); + } +#endif + DEBUG_RANGE_OFST("init", ps_cabac->u4_range, ps_cabac->u4_ofst); /* diff --git a/decoder/ihevcd_cabac.h b/decoder/ihevcd_cabac.h index 2c4a543..4fec647 100644 --- a/decoder/ihevcd_cabac.h +++ b/decoder/ihevcd_cabac.h @@ -214,6 +214,10 @@ IHEVCD_ERROR_T ihevcd_cabac_init WORD32 slice_qp, WORD32 cabac_init_idc, const UWORD8 *pu1_init_ctxt +#ifdef ENABLE_MAIN_REXT_PROFILE + , + const WORD32 *pi4_rice_stat_coeff +#endif ); diff --git a/decoder/ihevcd_common_tables.c b/decoder/ihevcd_common_tables.c index d94a33b..45a6dd6 100644 --- a/decoder/ihevcd_common_tables.c +++ b/decoder/ihevcd_common_tables.c @@ -38,7 +38,7 @@ #include "ihevcd_common_tables.h" #include "ihevc_defs.h" -const WORD16 gai2_ihevcd_chroma_qp[] = +const WORD16 gai2_ihevcd_chroma_qp_420[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, @@ -47,3 +47,12 @@ const WORD16 gai2_ihevcd_chroma_qp[] = const UWORD8 gau1_intra_pred_chroma_modes[] = { INTRA_PLANAR, INTRA_ANGULAR(26), INTRA_ANGULAR(10), INTRA_DC }; +const UWORD8 gau1_intra_pred_chroma_modes_422[] = + { 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20, + 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31}; + +const WORD16 gai2_ihevcd_chroma_qp_clip[] = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51 }; diff --git a/decoder/ihevcd_common_tables.h b/decoder/ihevcd_common_tables.h index 217fb1f..ecc5cc1 100644 --- a/decoder/ihevcd_common_tables.h +++ b/decoder/ihevcd_common_tables.h @@ -34,9 +34,12 @@ #ifndef _IHEVCD_COMMON_TABLES_H_ #define _IHEVCD_COMMON_TABLES_H_ -extern const WORD16 gai2_ihevcd_chroma_qp[]; +extern const WORD16 gai2_ihevcd_chroma_qp_420[]; extern const UWORD8 gau1_intra_pred_chroma_modes[]; +extern const UWORD8 gau1_intra_pred_chroma_modes_422[]; + +extern const WORD16 gai2_ihevcd_chroma_qp_clip[]; #endif /*_IHEVCD_COMMON_TABLES_H_*/ diff --git a/decoder/ihevcd_cxa.h b/decoder/ihevcd_cxa.h index 7baf480..4d7df58 100644 --- a/decoder/ihevcd_cxa.h +++ b/decoder/ihevcd_cxa.h @@ -71,8 +71,8 @@ IV_API_CALL_STATUS_T ihevcd_cxa_api_function(iv_obj_t *ps_handle, /*****************************************************************************/ /* Enums */ /*****************************************************************************/ -/* Codec Error codes for HEVC Decoder */ +/* Codec Error codes for HEVC Decoder */ typedef enum { /** @@ -176,6 +176,31 @@ typedef struct { * enable_frm_info */ UWORD32 u4_enable_frame_info; + + /** + * enable_threads + */ + UWORD32 u4_keep_threads_active; + + /** + * Bitmask specifying the set of supported YUV output formats. + * + * Use a bitwise OR to enable multiple formats. + * The bit positions are defined by the CHROMA_FMT_IDC_* constants. + * + * Bit positions and corresponding values: + * (1 << CHROMA_FMT_IDC_MONOCHROME): 1 (YUV 4:0:0) + * (1 << CHROMA_FMT_IDC_YUV420): 2 (YUV 4:2:0) + * (1 << CHROMA_FMT_IDC_YUV422): 4 (YUV 4:2:2) + * (1 << CHROMA_FMT_IDC_YUV444): 8 (YUV 4:4:4) + * (1 << CHROMA_FMT_IDC_YUV444_PLANES): 16 (YUV 4:4:4) + * + * Example (Enable 4:0:0 and 4:2:0): + * (1 << CHROMA_FMT_IDC_MONOCHROME) | (1 << CHROMA_FMT_IDC_YUV420) + * + * NOTE: If this field is set to 0, YUV 4:2:0 will be enabled by default. + */ + UWORD32 u4_enable_yuv_formats; }ihevcd_cxa_create_ip_t; diff --git a/decoder/ihevcd_deblk.c b/decoder/ihevcd_deblk.c index a00fadb..3a8259b 100644 --- a/decoder/ihevcd_deblk.c +++ b/decoder/ihevcd_deblk.c @@ -496,7 +496,7 @@ void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk, /* Chroma Veritcal Edge */ - if(0 == i4_is_last_ctb_x) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 == i4_is_last_ctb_x) { /* Top CTB's slice header */ @@ -643,7 +643,7 @@ void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk, /* Chroma Horizontal Edge */ - if(0 == i4_is_last_ctb_y) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 == i4_is_last_ctb_y) { /* Left CTB's slice header */ diff --git a/decoder/ihevcd_decode.c b/decoder/ihevcd_decode.c index d9ee1c0..42a49ac 100644 --- a/decoder/ihevcd_decode.c +++ b/decoder/ihevcd_decode.c @@ -253,6 +253,7 @@ static void ihevcd_fill_outargs(codec_t *ps_codec, if(ps_codec->ps_disp_buf) { pic_buf_t *ps_disp_buf = ps_codec->ps_disp_buf; +#ifndef DISABLE_SEI sei_params_t *ps_sei = &ps_disp_buf->s_sei_params; if(ps_sei->i1_sei_parameters_present_flag && @@ -277,6 +278,7 @@ static void ihevcd_fill_outargs(codec_t *ps_codec, break; } } +#endif ps_dec_op->i4_display_index = ps_disp_buf->i4_abs_poc; ps_dec_op->u4_output_present = 1; ps_dec_op->u4_ts = ps_disp_buf->u4_ts; @@ -354,6 +356,39 @@ static void ihevcd_fill_outargs(codec_t *ps_codec, ps_dec_op->s_disp_frm_buf.u4_v_ht = ps_dec_op->s_disp_frm_buf.u4_y_ht / 2; } + else if(IV_YUV_444P == ps_codec->e_chroma_fmt) + { + ps_dec_op->s_disp_frm_buf.u4_u_strd = ps_dec_op->s_disp_frm_buf.u4_y_strd; + ps_dec_op->s_disp_frm_buf.u4_v_strd = ps_dec_op->s_disp_frm_buf.u4_y_strd; + ps_dec_op->s_disp_frm_buf.u4_u_wd = ps_dec_op->s_disp_frm_buf.u4_y_wd; + ps_dec_op->s_disp_frm_buf.u4_v_wd = ps_dec_op->s_disp_frm_buf.u4_y_wd; + ps_dec_op->s_disp_frm_buf.u4_u_ht = ps_dec_op->s_disp_frm_buf.u4_y_ht; + ps_dec_op->s_disp_frm_buf.u4_v_ht = ps_dec_op->s_disp_frm_buf.u4_y_ht; + } + else if(IV_GRAY == ps_codec->e_chroma_fmt) + { + ps_dec_op->s_disp_frm_buf.u4_u_strd = 0; + ps_dec_op->s_disp_frm_buf.u4_v_strd = 0; + ps_dec_op->s_disp_frm_buf.u4_u_wd = 0; + ps_dec_op->s_disp_frm_buf.u4_v_wd = 0; + ps_dec_op->s_disp_frm_buf.u4_u_ht = 0; + ps_dec_op->s_disp_frm_buf.u4_v_ht = 0; + } + else if(IV_YUV_422P == ps_codec->e_chroma_fmt) + { + ps_dec_op->s_disp_frm_buf.u4_u_strd = + ps_dec_op->s_disp_frm_buf.u4_y_strd / 2; + ps_dec_op->s_disp_frm_buf.u4_v_strd = + ps_dec_op->s_disp_frm_buf.u4_y_strd / 2; + ps_dec_op->s_disp_frm_buf.u4_u_wd = + ps_dec_op->s_disp_frm_buf.u4_y_wd / 2; + ps_dec_op->s_disp_frm_buf.u4_v_wd = + ps_dec_op->s_disp_frm_buf.u4_y_wd / 2; + ps_dec_op->s_disp_frm_buf.u4_u_ht = + ps_dec_op->s_disp_frm_buf.u4_y_ht; + ps_dec_op->s_disp_frm_buf.u4_v_ht = + ps_dec_op->s_disp_frm_buf.u4_y_ht; + } } else if(ps_codec->i4_flush_mode) @@ -983,22 +1018,25 @@ WORD32 ihevcd_decode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) { if(ps_codec->ai4_process_thread_created[i]) { -#ifdef KEEP_THREADS_ACTIVE - ret = ithread_mutex_lock(ps_codec->apv_proc_done_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - - while(!ps_codec->ai4_process_done[i]) + if(ps_codec->i4_threads_active) { - ithread_cond_wait(ps_codec->apv_proc_done_condition[i], - ps_codec->apv_proc_done_mutex[i]); + ret = ithread_mutex_lock(ps_codec->apv_proc_done_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + + while(!ps_codec->ai4_process_done[i]) + { + ithread_cond_wait(ps_codec->apv_proc_done_condition[i], + ps_codec->apv_proc_done_mutex[i]); + } + ps_codec->ai4_process_done[i] = 0; + ret = ithread_mutex_unlock(ps_codec->apv_proc_done_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + } + else + { + ithread_join(ps_codec->apv_process_thread_handle[i], NULL); + ps_codec->ai4_process_thread_created[i] = 0; } - ps_codec->ai4_process_done[i] = 0; - ret = ithread_mutex_unlock(ps_codec->apv_proc_done_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); -#else - ithread_join(ps_codec->apv_process_thread_handle[i], NULL); - ps_codec->ai4_process_thread_created[i] = 0; -#endif } } diff --git a/decoder/ihevcd_defs.h b/decoder/ihevcd_defs.h index 47fff65..21f81b6 100644 --- a/decoder/ihevcd_defs.h +++ b/decoder/ihevcd_defs.h @@ -196,7 +196,7 @@ m_scaling_mat_size = 6 * TRANS_SIZE_4 * TRANS_SIZE_4; \ m_scaling_mat_size += 6 * TRANS_SIZE_8 * TRANS_SIZE_8; \ m_scaling_mat_size += 6 * TRANS_SIZE_16 * TRANS_SIZE_16; \ - m_scaling_mat_size += 2 * TRANS_SIZE_32 * TRANS_SIZE_32; \ + m_scaling_mat_size += 6 * TRANS_SIZE_32 * TRANS_SIZE_32; \ } /** @@ -450,10 +450,10 @@ enum */ #define MIN_IN_BUFS 1 #define MIN_OUT_BUFS_420 3 -#define MIN_OUT_BUFS_422ILE 1 -#define MIN_OUT_BUFS_RGB565 1 -#define MIN_OUT_BUFS_RGBA8888 1 +#define MIN_OUT_BUFS_422 3 +#define MIN_OUT_BUFS_444 3 #define MIN_OUT_BUFS_420SP 2 +#define MIN_OUT_BUFS_GRAY 1 /** **************************************************************************** diff --git a/decoder/ihevcd_error.h b/decoder/ihevcd_error.h index 429d4cc..91e447a 100644 --- a/decoder/ihevcd_error.h +++ b/decoder/ihevcd_error.h @@ -128,5 +128,10 @@ typedef enum */ IHEVCD_SEI_MASTERING_PARAMS_NOT_FOUND, + /** + * Feature not supported + */ + IHEVCD_UNSUPPORTED_TOOL_SET, + }IHEVCD_ERROR_T; #endif /* _IHEVCD_ERROR_H_ */ diff --git a/decoder/ihevcd_fmt_conv.c b/decoder/ihevcd_fmt_conv.c index 4637fe8..2063703 100644 --- a/decoder/ihevcd_fmt_conv.c +++ b/decoder/ihevcd_fmt_conv.c @@ -67,270 +67,6 @@ /* SIMD variants of format conversion modules do not support width less than 32 */ #define MIN_FMT_CONV_SIMD_WIDTH 32 -/** -******************************************************************************* -* -* @brief Function used from copying a 420SP buffer -* -* @par Description -* Function used from copying a 420SP buffer -* -* @param[in] pu1_y_src -* Input Y pointer -* -* @param[in] pu1_uv_src -* Input UV pointer (UV is interleaved either in UV or VU format) -* -* @param[in] pu1_y_dst -* Output Y pointer -* -* @param[in] pu1_uv_dst -* Output UV pointer (UV is interleaved in the same format as that of input) -* -* @param[in] wd -* Width -* -* @param[in] ht -* Height -* -* @param[in] src_y_strd -* Input Y Stride -* -* @param[in] src_uv_strd -* Input UV stride -* -* @param[in] dst_y_strd -* Output Y stride -* -* @param[in] dst_uv_strd -* Output UV stride -* -* @returns None -* -* @remarks In case there is a need to perform partial frame copy then -* by passion appropriate source and destination pointers and appropriate -* values for wd and ht it can be done -* -******************************************************************************* -*/ -void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src, - UWORD8 *pu1_uv_src, - UWORD16 *pu2_rgb_dst, - WORD32 wd, - WORD32 ht, - WORD32 src_y_strd, - WORD32 src_uv_strd, - WORD32 dst_strd, - WORD32 is_u_first) -{ - - - WORD16 i2_r, i2_g, i2_b; - UWORD32 u4_r, u4_g, u4_b; - WORD16 i2_i, i2_j; - UWORD8 *pu1_y_src_nxt; - UWORD16 *pu2_rgb_dst_NextRow; - - UWORD8 *pu1_u_src, *pu1_v_src; - - if(is_u_first) - { - pu1_u_src = (UWORD8 *)pu1_uv_src; - pu1_v_src = (UWORD8 *)pu1_uv_src + 1; - } - else - { - pu1_u_src = (UWORD8 *)pu1_uv_src + 1; - pu1_v_src = (UWORD8 *)pu1_uv_src; - } - - pu1_y_src_nxt = pu1_y_src + src_y_strd; - pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd; - - for(i2_i = 0; i2_i < (ht >> 1); i2_i++) - { - for(i2_j = (wd >> 1); i2_j > 0; i2_j--) - { - i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); - i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13; - i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; - - pu1_u_src += 2; - pu1_v_src += 2; - /* pixel 0 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src + i2_b); - u4_b >>= 3; - /* G */ - u4_g = CLIP_U8(*pu1_y_src + i2_g); - u4_g >>= 2; - /* R */ - u4_r = CLIP_U8(*pu1_y_src + i2_r); - u4_r >>= 3; - - pu1_y_src++; - *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); - - /* pixel 1 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src + i2_b); - u4_b >>= 3; - /* G */ - u4_g = CLIP_U8(*pu1_y_src + i2_g); - u4_g >>= 2; - /* R */ - u4_r = CLIP_U8(*pu1_y_src + i2_r); - u4_r >>= 3; - - pu1_y_src++; - *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); - - /* pixel 2 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); - u4_b >>= 3; - /* G */ - u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); - u4_g >>= 2; - /* R */ - u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); - u4_r >>= 3; - - pu1_y_src_nxt++; - *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b); - - /* pixel 3 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); - u4_b >>= 3; - /* G */ - u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); - u4_g >>= 2; - /* R */ - u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); - u4_r >>= 3; - - pu1_y_src_nxt++; - *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b); - - } - - pu1_u_src = pu1_u_src + src_uv_strd - wd; - pu1_v_src = pu1_v_src + src_uv_strd - wd; - - pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; - pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; - - pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd; - pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd; - } - - -} - -void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src, - UWORD8 *pu1_uv_src, - UWORD32 *pu4_rgba_dst, - WORD32 wd, - WORD32 ht, - WORD32 src_y_strd, - WORD32 src_uv_strd, - WORD32 dst_strd, - WORD32 is_u_first) -{ - - - WORD16 i2_r, i2_g, i2_b; - UWORD32 u4_r, u4_g, u4_b; - WORD16 i2_i, i2_j; - UWORD8 *pu1_y_src_nxt; - UWORD32 *pu4_rgba_dst_NextRow; - - UWORD8 *pu1_u_src, *pu1_v_src; - - if(is_u_first) - { - pu1_u_src = (UWORD8 *)pu1_uv_src; - pu1_v_src = (UWORD8 *)pu1_uv_src + 1; - } - else - { - pu1_u_src = (UWORD8 *)pu1_uv_src + 1; - pu1_v_src = (UWORD8 *)pu1_uv_src; - } - - pu1_y_src_nxt = pu1_y_src + src_y_strd; - pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd; - - for(i2_i = 0; i2_i < (ht >> 1); i2_i++) - { - for(i2_j = (wd >> 1); i2_j > 0; i2_j--) - { - i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); - i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13; - i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; - - pu1_u_src += 2; - pu1_v_src += 2; - /* pixel 0 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src + i2_b); - /* G */ - u4_g = CLIP_U8(*pu1_y_src + i2_g); - /* R */ - u4_r = CLIP_U8(*pu1_y_src + i2_r); - - pu1_y_src++; - *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); - - /* pixel 1 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src + i2_b); - /* G */ - u4_g = CLIP_U8(*pu1_y_src + i2_g); - /* R */ - u4_r = CLIP_U8(*pu1_y_src + i2_r); - - pu1_y_src++; - *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); - - /* pixel 2 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); - /* G */ - u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); - /* R */ - u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); - - pu1_y_src_nxt++; - *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); - - /* pixel 3 */ - /* B */ - u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); - /* G */ - u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); - /* R */ - u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); - - pu1_y_src_nxt++; - *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); - - } - - pu1_u_src = pu1_u_src + src_uv_strd - wd; - pu1_v_src = pu1_v_src + src_uv_strd - wd; - - pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; - pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; - - pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd; - pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd; - } - - -} - /** ******************************************************************************* * @@ -430,6 +166,128 @@ void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src, } +/** +******************************************************************************* +* +* @brief Function used from copying a 400 buffer +* +* @par Description +* Function used from copying a 400 buffer +* +* @param[in] pu1_y_src +* Input Y pointer +* +* @param[in] pu1_y_dst +* Output Y pointer +* +* @param[in] wd +* Width +* +* @param[in] ht +* Height +* +* @param[in] src_y_strd +* Input Y Stride +* +* @param[in] dst_y_strd +* Output Y stride +* +* @returns None +* +* @remarks In case there is a need to perform partial frame copy then +* by passion appropriate source and destination pointers and appropriate +* values for wd and ht it can be done +* +******************************************************************************* +*/ + +void ihevcd_fmt_conv_luma_copy(UWORD8 *pu1_y_src, + UWORD8 *pu1_y_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 dst_y_strd) +{ + UWORD8 *pu1_src, *pu1_dst; + WORD32 num_rows, num_cols, src_strd, dst_strd; + WORD32 i; + + /* copy luma */ + pu1_src = (UWORD8 *)pu1_y_src; + pu1_dst = (UWORD8 *)pu1_y_dst; + + num_rows = ht; + num_cols = wd; + + src_strd = src_y_strd; + dst_strd = dst_y_strd; + + for(i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + return; +} + +/** +******************************************************************************* +* +* @brief Function to convert a YUV 4:0:0 buffer to YUV 4:2:0 buffer +* +* @par Description +* This function handles the format conversion from a 4:0:0 input buffer to a +* 4:2:0 output buffer. It copies the Luma (Y) plane directly and synthesizes +* the Chroma (U and V) planes by initializing them to a neutral gray value (128). +* +* @param[in] pu1_y_src +*   Input Y pointer +* +* @param[out] pu1_y_dst_tmp +*   Output Y pointer +* +* @param[out] pu1_u_dst_tmp +*   Output Chroma U pointer +* +* @param[out] pu1_v_dst_tmp +*   Output Chroma V pointer +* +* @param[in] ps_codec +*   Pointer to the codec structure +* +* @param[in] num_rows +*   Number of rows (height) to process. +* +* @returns None +* +* @remarks The Chroma (U and V) planes are initialized to 128, which represents +* a neutral gray in 8-bit YUV, effectively 'zeroing out' the color information +* present in the 4:0:0 (grayscale) source. The Luma plane is copied using +* ihevcd_fmt_conv_400_to_400. +* +******************************************************************************* +*/ +void ihevcd_fmt_conv_400_to_420p(UWORD8 *pu1_y_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + ihevcd_fmt_conv_luma_copy(pu1_y_src, pu1_y_dst, wd, ht, src_y_strd, dst_y_strd); + for(int i = 0; i < ht / 2; i++) + { + memset(pu1_u_dst, 128, wd / 2); + memset(pu1_v_dst, 128, wd / 2); + pu1_u_dst += dst_uv_strd; + pu1_v_dst += dst_uv_strd; + } + return; +} /** ******************************************************************************* @@ -659,6 +517,178 @@ void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src, return; } +/** +******************************************************************************* +* +* @brief Function to convert a YUV 4:4:4 sp buffer to YUV 4:4:4 planar buffer +* +* @par Description +* This function handles the format conversion from a 4:4:4 semi planar input buffer +* to a 4:4:4 planar output buffer. +* +* @param[in] pu1_y_src +* Input Y pointer +* +* @param[in] pu1_uv_src +* Input UV pointer (UV is interleaved) +* +* @param[in] pu1_y_dst +* Output Y pointer +* +* @param[in] pu1_u_dst +* Output U pointer +* +* @param[in] pu1_v_dst +* Output V pointer +* +* @param[in] wd +* Width +* +* @param[in] ht +* Height +* +* @param[in] src_y_strd +* Input Y Stride +* +* @param[in] src_uv_strd +* Input UV stride +* +* @param[in] dst_y_strd +* Output Y stride +* +* @param[in] dst_uv_strd +* Output U or V stride +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ihevcd_fmt_conv_444sp_to_444p(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + ihevcd_fmt_conv_luma_copy(pu1_y_src, pu1_y_dst, wd, ht, src_y_strd, dst_y_strd); + + /* de-interleave U and V and copy to destination */ + UWORD8 *pu1_u_src = (UWORD8*)pu1_uv_src; + UWORD8 *pu1_v_src = (UWORD8*)pu1_uv_src + 1; + + for(WORD32 i = 0; i < ht; i++) + { + for(WORD32 j = 0; j < wd; j++) + { + pu1_u_dst[j] = pu1_u_src[j * 2]; + pu1_v_dst[j] = pu1_v_src[j * 2]; + } + pu1_u_dst += dst_uv_strd; + pu1_v_dst += dst_uv_strd; + pu1_u_src += src_uv_strd; + pu1_v_src += src_uv_strd; + } + return; +} + +void ihevcd_fmt_conv_444sp_to_420p(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + ihevcd_fmt_conv_luma_copy(pu1_y_src, pu1_y_dst, wd, ht, src_y_strd, dst_y_strd); + + for(WORD32 i = 0; i < ht; i += 2) + { + for(WORD32 j = 0; j < wd; j += 2) + { + pu1_u_dst[j / 2] = pu1_uv_src[j * 2]; + pu1_v_dst[j / 2] = pu1_uv_src[j * 2 + 1]; + } + pu1_u_dst += dst_uv_strd; + pu1_v_dst += dst_uv_strd; + pu1_uv_src += (src_uv_strd * 2); + } + return; +} + +void ihevcd_fmt_conv_422sp_to_422p(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + UWORD8 *pu1_u_src, *pu1_v_src; + WORD32 i, j; + + ihevcd_fmt_conv_luma_copy(pu1_y_src, pu1_y_dst, wd, ht, src_y_strd, dst_y_strd); + + pu1_u_src = (UWORD8 *)pu1_uv_src; + pu1_v_src = (UWORD8 *)pu1_uv_src + 1; + + for(i = 0; i < ht; i++) + { + for(j = 0; j < (wd >> 1); j++) + { + pu1_u_dst[j] = pu1_u_src[j * 2]; + pu1_v_dst[j] = pu1_v_src[j * 2]; + } + + pu1_u_dst += dst_uv_strd; + pu1_v_dst += dst_uv_strd; + pu1_u_src += src_uv_strd; + pu1_v_src += src_uv_strd; + } + return; +} + +void ihevcd_fmt_conv_422sp_to_420p(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + ihevcd_fmt_conv_luma_copy(pu1_y_src, pu1_y_dst, wd, ht, src_y_strd, dst_y_strd); + + for(WORD32 i = 0; i < ht; i++) + { + for(WORD32 j = 0; j < wd; j += 2) + { + pu1_u_dst[j / 2] = pu1_uv_src[j * 2]; + pu1_v_dst[j / 2] = pu1_uv_src[j * 2 + 1]; + } + pu1_u_dst += dst_uv_strd; + pu1_v_dst += dst_uv_strd; + pu1_uv_src += src_uv_strd; + } + return; +} /** @@ -702,14 +732,15 @@ IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, UWORD8 *pu1_y_src, *pu1_uv_src; UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp; UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp; - UWORD16 *pu2_rgb_dst_tmp; - UWORD32 *pu4_rgb_dst_tmp; WORD32 is_u_first; UWORD8 *pu1_luma; UWORD8 *pu1_chroma; sps_t *ps_sps; WORD32 disable_luma_copy; WORD32 crop_unit_x, crop_unit_y; + WORD32 h_samp_factor, v_samp_factor; + WORD32 src_chroma_pixel_strd = 2; + WORD32 src_chroma_row_stride; if(0 == num_rows) return ret; @@ -718,6 +749,9 @@ IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, PROFILE_DISABLE_FMT_CONV(); ps_sps = ps_proc->ps_sps; + h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2; + v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1; + crop_unit_x = 1; crop_unit_y = 1; @@ -726,19 +760,29 @@ IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, crop_unit_x = 2; crop_unit_y = 2; } + else if(CHROMA_FMT_IDC_YUV422 == ps_sps->i1_chroma_format_idc) + { + crop_unit_x = 2; + crop_unit_y = 1; + } ps_disp_pic = ps_codec->ps_disp_buf; pu1_luma = ps_disp_pic->pu1_luma; - pu1_chroma = ps_disp_pic->pu1_chroma; + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + pu1_chroma = ps_disp_pic->pu1_chroma; + } /* Take care of cropping */ pu1_luma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x; - /* Left offset is multiplied by 2 because buffer is UV interleaved */ - pu1_chroma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2; - - + src_chroma_row_stride = (ps_codec->i4_strd * src_chroma_pixel_strd / h_samp_factor); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + pu1_chroma += (ps_sps->i2_pic_crop_top_offset * src_chroma_row_stride) + + ps_sps->i2_pic_crop_left_offset * src_chroma_pixel_strd; + } is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0; /* In case of 420P output luma copy is disabled for shared mode */ @@ -752,7 +796,10 @@ IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, { pu1_y_src = pu1_luma + cur_row * ps_codec->i4_strd; - pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd; + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + pu1_uv_src = pu1_chroma + ((cur_row / v_samp_factor) * src_chroma_row_stride); + } /* In case of shared mode, with 420P output, get chroma destination */ if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt)) @@ -772,14 +819,27 @@ IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, } } } - pu2_rgb_dst_tmp = (UWORD16 *)pu1_y_dst; - pu2_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd; - pu4_rgb_dst_tmp = (UWORD32 *)pu1_y_dst; - pu4_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd; pu1_y_dst_tmp = pu1_y_dst + cur_row * ps_codec->i4_disp_strd; - pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd; - pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2; - pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2; + if(IV_YUV_444P == ps_codec->e_chroma_fmt) + { + pu1_u_dst_tmp = pu1_u_dst + cur_row * ps_codec->i4_disp_strd; + pu1_v_dst_tmp = pu1_v_dst + cur_row * ps_codec->i4_disp_strd; + } + else if(IV_YUV_422P == ps_codec->e_chroma_fmt) + { + pu1_u_dst_tmp = pu1_u_dst + cur_row * ps_codec->i4_disp_strd / 2; + pu1_v_dst_tmp = pu1_v_dst + cur_row * ps_codec->i4_disp_strd / 2; + } + else if(IV_YUV_420P == ps_codec->e_chroma_fmt) + { + pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2; + pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2; + } + else if(IV_YUV_420SP_UV == ps_codec->e_chroma_fmt + || IV_YUV_420SP_VU == ps_codec->e_chroma_fmt) + { + pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd; + } /* In case of multi threaded implementation, format conversion might be called * before reconstruction is completed. If the frame being converted/copied @@ -829,112 +889,119 @@ IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec, } } - if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt)) { - ihevcd_fmt_conv_420sp_to_420sp_ft *fmt_conv_fptr; - if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) { - fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr; + ihevcd_fmt_conv_420sp_to_420sp_ft *fmt_conv_fptr; + if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) + { + fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr; + } + else + { + fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420sp; + } + fmt_conv_fptr(pu1_y_src, pu1_uv_src, + pu1_y_dst_tmp, pu1_uv_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, ps_codec->i4_strd, + ps_codec->i4_disp_strd, ps_codec->i4_disp_strd); } - else + } + else if(IV_GRAY == ps_codec->e_chroma_fmt) + { + ihevcd_fmt_conv_luma_copy(pu1_y_src, + pu1_y_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, + ps_codec->i4_disp_strd); + } + else if(IV_YUV_444P == ps_codec->e_chroma_fmt) + { + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) { - fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420sp; + ps_codec->s_func_selector.ihevcd_fmt_conv_444sp_to_444p_fptr( + pu1_y_src, pu1_uv_src, + pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, src_chroma_row_stride, + ps_codec->i4_disp_strd, ps_codec->i4_disp_strd); + } + } + else if(IV_YUV_422P == ps_codec->e_chroma_fmt) + { + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + ihevcd_fmt_conv_422sp_to_422p(pu1_y_src, pu1_uv_src, + pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, ps_codec->i4_strd, + ps_codec->i4_disp_strd, (ps_codec->i4_disp_strd / 2)); } - fmt_conv_fptr(pu1_y_src, pu1_uv_src, - pu1_y_dst_tmp, pu1_uv_dst_tmp, - ps_codec->i4_disp_wd, - num_rows, - ps_codec->i4_strd, - ps_codec->i4_strd, - ps_codec->i4_disp_strd, - ps_codec->i4_disp_strd); } else if(IV_YUV_420P == ps_codec->e_chroma_fmt) { - ihevcd_fmt_conv_420sp_to_420p_ft *fmt_conv_fptr; - if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_MONOCHROME) { - fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr; + ihevcd_fmt_conv_400_to_420p(pu1_y_src, + pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, + ps_codec->i4_disp_strd, (ps_codec->i4_disp_strd / 2)); } - else + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) { - fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420p; - } - - if(0 == disable_luma_copy) - { - // copy luma - WORD32 i; - WORD32 num_cols = ps_codec->i4_disp_wd; - - for(i = 0; i < num_rows; i++) + ihevcd_fmt_conv_420sp_to_420p_ft *fmt_conv_fptr; + if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) { - memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols); - pu1_y_dst_tmp += ps_codec->i4_disp_strd; - pu1_y_src += ps_codec->i4_strd; + fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr; + } + else + { + fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420p; } - disable_luma_copy = 1; + if(0 == disable_luma_copy) + { + // copy luma + WORD32 i; + WORD32 num_cols = ps_codec->i4_disp_wd; + + for(i = 0; i < num_rows; i++) + { + memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols); + pu1_y_dst_tmp += ps_codec->i4_disp_strd; + pu1_y_src += ps_codec->i4_strd; + } + + disable_luma_copy = 1; + } + fmt_conv_fptr(pu1_y_src, pu1_uv_src, + pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, ps_codec->i4_strd, + ps_codec->i4_disp_strd, (ps_codec->i4_disp_strd / 2), + is_u_first, + disable_luma_copy); + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + ihevcd_fmt_conv_444sp_to_420p(pu1_y_src, pu1_uv_src, + pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, src_chroma_row_stride, + ps_codec->i4_disp_strd, ps_codec->i4_disp_strd / 2); + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + ihevcd_fmt_conv_422sp_to_420p(pu1_y_src, pu1_uv_src, + pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, + ps_codec->i4_disp_wd, num_rows, + ps_codec->i4_strd, src_chroma_row_stride, + ps_codec->i4_disp_strd, ps_codec->i4_disp_strd / 2); } - fmt_conv_fptr(pu1_y_src, pu1_uv_src, - pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp, - ps_codec->i4_disp_wd, - num_rows, - ps_codec->i4_strd, - ps_codec->i4_strd, - ps_codec->i4_disp_strd, - (ps_codec->i4_disp_strd / 2), - is_u_first, - disable_luma_copy); } - else if(IV_RGB_565 == ps_codec->e_chroma_fmt) - { - ihevcd_fmt_conv_420sp_to_rgb565_ft *fmt_conv_fptr; - if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) - { - fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr; - } - else - { - fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgb565; - } - - fmt_conv_fptr(pu1_y_src, pu1_uv_src, - pu2_rgb_dst_tmp, - ps_codec->i4_disp_wd, - num_rows, - ps_codec->i4_strd, - ps_codec->i4_strd, - ps_codec->i4_disp_strd, - is_u_first); - } - else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt) - { - ihevcd_fmt_conv_420sp_to_rgba8888_ft *fmt_conv_fptr; - if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH) - { - fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr; - } - else - { - fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgba8888; - } - - ASSERT(is_u_first == 1); - fmt_conv_fptr(pu1_y_src, - pu1_uv_src, - pu4_rgb_dst_tmp, - ps_codec->i4_disp_wd, - num_rows, - ps_codec->i4_strd, - ps_codec->i4_strd, - ps_codec->i4_disp_strd, - is_u_first); - } - - - } return (ret); } diff --git a/decoder/ihevcd_fmt_conv.h b/decoder/ihevcd_fmt_conv.h index e099218..0e4b680 100644 --- a/decoder/ihevcd_fmt_conv.h +++ b/decoder/ihevcd_fmt_conv.h @@ -43,27 +43,6 @@ #define COEFF3 -6664 #define COEFF4 16530 -typedef void ihevcd_fmt_conv_420sp_to_rgba8888_ft(UWORD8 *pu1_y_src, - UWORD8 *pu1_uv_src, - UWORD32 *pu4_rgba_dst, - WORD32 wd, - WORD32 ht, - WORD32 src_y_strd, - WORD32 src_uv_strd, - WORD32 dst_strd, - WORD32 is_u_first); - -typedef void ihevcd_fmt_conv_420sp_to_rgb565_ft(UWORD8 *pu1_y_src, - UWORD8 *pu1_uv_src, - UWORD16 *pu2_rgb_dst, - WORD32 wd, - WORD32 ht, - WORD32 src_y_strd, - WORD32 src_uv_strd, - WORD32 dst_strd, - WORD32 is_u_first); - - typedef void ihevcd_fmt_conv_420sp_to_420sp_ft(UWORD8 *pu1_y_src, UWORD8 *pu1_uv_src, UWORD8 *pu1_y_dst, @@ -87,20 +66,28 @@ typedef void ihevcd_fmt_conv_420sp_to_420p_ft(UWORD8 *pu1_y_src, WORD32 dst_uv_strd, WORD32 is_u_first, WORD32 disable_luma_copy); +typedef void ihevcd_fmt_conv_444sp_to_444p_ft(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd); /* C function declarations */ -ihevcd_fmt_conv_420sp_to_rgba8888_ft ihevcd_fmt_conv_420sp_to_rgba8888; -ihevcd_fmt_conv_420sp_to_rgb565_ft ihevcd_fmt_conv_420sp_to_rgb565; ihevcd_fmt_conv_420sp_to_420sp_ft ihevcd_fmt_conv_420sp_to_420sp; ihevcd_fmt_conv_420sp_to_420p_ft ihevcd_fmt_conv_420sp_to_420p; +ihevcd_fmt_conv_444sp_to_444p_ft ihevcd_fmt_conv_444sp_to_444p; /* A9Q function declarations */ -ihevcd_fmt_conv_420sp_to_rgba8888_ft ihevcd_fmt_conv_420sp_to_rgba8888_a9q; ihevcd_fmt_conv_420sp_to_420sp_ft ihevcd_fmt_conv_420sp_to_420sp_a9q; ihevcd_fmt_conv_420sp_to_420p_ft ihevcd_fmt_conv_420sp_to_420p_a9q; /* A9A function declarations */ -ihevcd_fmt_conv_420sp_to_rgba8888_ft ihevcd_fmt_conv_420sp_to_rgba8888_a9a; ihevcd_fmt_conv_420sp_to_420sp_ft ihevcd_fmt_conv_420sp_to_420sp_a9a; ihevcd_fmt_conv_420sp_to_420p_ft ihevcd_fmt_conv_420sp_to_420p_a9a; @@ -111,7 +98,6 @@ ihevcd_fmt_conv_420sp_to_420p_ft ihevcd_fmt_conv_420sp_to_420p_ssse3; ihevcd_fmt_conv_420sp_to_420p_ft ihevcd_fmt_conv_420sp_to_420p_sse42; /* armv8 function declarations */ -ihevcd_fmt_conv_420sp_to_rgba8888_ft ihevcd_fmt_conv_420sp_to_rgba8888_av8; ihevcd_fmt_conv_420sp_to_420sp_ft ihevcd_fmt_conv_420sp_to_420sp_av8; ihevcd_fmt_conv_420sp_to_420p_ft ihevcd_fmt_conv_420sp_to_420p_av8; diff --git a/decoder/ihevcd_function_selector.h b/decoder/ihevcd_function_selector.h index e7d7eee..53a3cd6 100644 --- a/decoder/ihevcd_function_selector.h +++ b/decoder/ihevcd_function_selector.h @@ -40,6 +40,7 @@ #include "ihevc_deblk.h" #include "ihevc_itrans.h" +#include "ihevc_itrans_res.h" #include "ihevc_itrans_recon.h" #include "ihevc_chroma_itrans_recon.h" #include "ihevc_chroma_intra_pred.h" @@ -71,18 +72,6 @@ #define D_ARCH_MIPS_GENERIC 15 #define D_ARCH_MIPS_32 16 -void ihevcd_init_arch(void *pv_codec); - -void ihevcd_init_function_ptr(void *pv_codec); - -void ihevcd_init_function_ptr_generic(void *pv_codec); -void ihevcd_init_function_ptr_ssse3(void *pv_codec); -void ihevcd_init_function_ptr_sse42(void *pv_codec); - -#ifndef DISABLE_AVX2 -void ihevcd_init_function_ptr_avx2(void *pv_codec); -#endif - typedef struct { ihevc_deblk_chroma_horz_ft *ihevc_deblk_chroma_horz_fptr; @@ -111,6 +100,7 @@ typedef struct ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; ihevc_intra_pred_luma_ref_subst_all_avlble_ft *ihevc_intra_pred_luma_ref_subst_all_avlble_fptr; ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr; + ihevc_intra_pred_chroma_ref_filtering_ft *ihevc_intra_pred_chroma_ref_filtering_fptr; ihevc_intra_pred_chroma_dc_ft *ihevc_intra_pred_chroma_dc_fptr; ihevc_intra_pred_chroma_horz_ft *ihevc_intra_pred_chroma_horz_fptr; ihevc_intra_pred_chroma_mode2_ft *ihevc_intra_pred_chroma_mode2_fptr; @@ -136,6 +126,11 @@ typedef struct ihevc_itrans_8x8_ft *ihevc_itrans_8x8_fptr; ihevc_itrans_16x16_ft *ihevc_itrans_16x16_fptr; ihevc_itrans_32x32_ft *ihevc_itrans_32x32_fptr; + ihevc_itrans_res_4x4_ttype1_ft *ihevc_itrans_res_4x4_ttype1_fptr; + ihevc_itrans_res_4x4_ft *ihevc_itrans_res_4x4_fptr; + ihevc_itrans_res_8x8_ft *ihevc_itrans_res_8x8_fptr; + ihevc_itrans_res_16x16_ft *ihevc_itrans_res_16x16_fptr; + ihevc_itrans_res_32x32_ft *ihevc_itrans_res_32x32_fptr; ihevc_itrans_recon_4x4_ttype1_ft *ihevc_itrans_recon_4x4_ttype1_fptr; ihevc_itrans_recon_4x4_ft *ihevc_itrans_recon_4x4_fptr; ihevc_itrans_recon_8x8_ft *ihevc_itrans_recon_8x8_fptr; @@ -144,6 +139,7 @@ typedef struct ihevc_chroma_itrans_recon_4x4_ft *ihevc_chroma_itrans_recon_4x4_fptr; ihevc_chroma_itrans_recon_8x8_ft *ihevc_chroma_itrans_recon_8x8_fptr; ihevc_chroma_itrans_recon_16x16_ft *ihevc_chroma_itrans_recon_16x16_fptr; + ihevc_chroma_itrans_recon_32x32_ft *ihevc_chroma_itrans_recon_32x32_fptr; ihevc_recon_4x4_ttype1_ft *ihevc_recon_4x4_ttype1_fptr; ihevc_recon_4x4_ft *ihevc_recon_4x4_fptr; ihevc_recon_8x8_ft *ihevc_recon_8x8_fptr; @@ -152,6 +148,7 @@ typedef struct ihevc_chroma_recon_4x4_ft *ihevc_chroma_recon_4x4_fptr; ihevc_chroma_recon_8x8_ft *ihevc_chroma_recon_8x8_fptr; ihevc_chroma_recon_16x16_ft *ihevc_chroma_recon_16x16_fptr; + ihevc_chroma_recon_32x32_ft *ihevc_chroma_recon_32x32_fptr; ihevc_memcpy_mul_8_ft *ihevc_memcpy_mul_8_fptr; ihevc_memcpy_ft *ihevc_memcpy_fptr; ihevc_memset_mul_8_ft *ihevc_memset_mul_8_fptr; @@ -178,12 +175,36 @@ typedef struct ihevc_sao_edge_offset_class2_chroma_ft *ihevc_sao_edge_offset_class2_chroma_fptr; ihevc_sao_edge_offset_class3_ft *ihevc_sao_edge_offset_class3_fptr; ihevc_sao_edge_offset_class3_chroma_ft *ihevc_sao_edge_offset_class3_chroma_fptr; - ihevcd_fmt_conv_420sp_to_rgba8888_ft *ihevcd_fmt_conv_420sp_to_rgba8888_fptr; - ihevcd_fmt_conv_420sp_to_rgb565_ft *ihevcd_fmt_conv_420sp_to_rgb565_fptr; ihevcd_fmt_conv_420sp_to_420sp_ft *ihevcd_fmt_conv_420sp_to_420sp_fptr; ihevcd_fmt_conv_420sp_to_420p_ft *ihevcd_fmt_conv_420sp_to_420p_fptr; + ihevcd_fmt_conv_444sp_to_444p_ft *ihevcd_fmt_conv_444sp_to_444p_fptr; ihevcd_itrans_recon_dc_luma_ft *ihevcd_itrans_recon_dc_luma_fptr; ihevcd_itrans_recon_dc_chroma_ft *ihevcd_itrans_recon_dc_chroma_fptr; + ihevcd_itrans_res_dc_ft *ihevcd_itrans_res_dc_fptr; }func_selector_t; +void ihevcd_init_arch(void *pv_codec); + +void ihevcd_init_function_ptr(void *pv_codec); + +#ifdef __cplusplus +extern "C" { +#endif +void ihevcd_init_function_ptr_generic(func_selector_t *ps_func_selector); +void ihevcd_init_function_ptr_ssse3(func_selector_t *ps_func_selector); +void ihevcd_init_function_ptr_sse42(func_selector_t *ps_func_selector); + +#ifndef DISABLE_AVX2 +void ihevcd_init_function_ptr_avx2(func_selector_t *ps_func_selector); +#endif + +void ihevcd_init_function_ptr_neonintr(func_selector_t *ps_func_selector); +void ihevcd_init_function_ptr_noneon(func_selector_t *ps_func_selector); +void ihevcd_init_function_ptr_a9q(func_selector_t *ps_func_selector); +void ihevcd_init_function_ptr_av8(func_selector_t *ps_func_selector); + +#ifdef __cplusplus +} +#endif + #endif /* _IHEVCD_FUNCTION_SELECTOR_H_ */ diff --git a/decoder/ihevcd_ilf_padding.c b/decoder/ihevcd_ilf_padding.c index 9db82e5..7edbfa9 100644 --- a/decoder/ihevcd_ilf_padding.c +++ b/decoder/ihevcd_ilf_padding.c @@ -154,8 +154,7 @@ void ihevcd_ilf_pad_frame(deblk_ctxt_t *ps_deblk_ctxt, sao_ctxt_t *ps_sao_ctxt) { UWORD8 *pu1_cur_ctb_luma = ps_deblk_ctxt->pu1_cur_pic_luma + (i4_ctb_x * ctb_size - + i4_ctb_y * ctb_size - * ps_codec->i4_strd); + + i4_ctb_y * ctb_size * ps_codec->i4_strd); UWORD8 *pu1_cur_ctb_chroma = ps_deblk_ctxt->pu1_cur_pic_chroma + i4_ctb_x * ctb_size + (i4_ctb_y * ctb_size * ps_codec->i4_strd / 2); @@ -163,46 +162,56 @@ void ihevcd_ilf_pad_frame(deblk_ctxt_t *ps_deblk_ctxt, sao_ctxt_t *ps_sao_ctxt) if(0 == i4_ctb_x) { WORD32 pad_ht_luma; - WORD32 pad_ht_chroma; pad_ht_luma = ctb_size; pad_ht_luma += (ps_sps->i2_pic_ht_in_ctb - 1) == i4_ctb_y ? 8 : 0; - pad_ht_chroma = ctb_size / 2; - pad_ht_chroma += (ps_sps->i2_pic_ht_in_ctb - 1) == i4_ctb_y ? 8 : 0; /* Pad left after 1st CTB is processed */ ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(pu1_cur_ctb_luma - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(pu1_cur_ctb_chroma - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + WORD32 pad_ht_chroma = ctb_size / 2; + pad_ht_chroma += (ps_sps->i2_pic_ht_in_ctb - 1) == i4_ctb_y ? 8 : 0; + ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(pu1_cur_ctb_chroma - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); + } } else if((ps_sps->i2_pic_wd_in_ctb - 1) == i4_ctb_x) { WORD32 pad_ht_luma; - WORD32 pad_ht_chroma; WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (i4_ctb_x << ps_sps->i1_log2_ctb_size); pad_ht_luma = ctb_size; pad_ht_luma += (ps_sps->i2_pic_ht_in_ctb - 1) == i4_ctb_y ? 8 : 0; - pad_ht_chroma = ctb_size / 2; - pad_ht_chroma += (ps_sps->i2_pic_ht_in_ctb - 1) == i4_ctb_y ? 8 : 0; /* Pad right after last CTB in the current row is processed */ ps_codec->s_func_selector.ihevc_pad_right_luma_fptr(pu1_cur_ctb_luma + cols_remaining - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_RIGHT); - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(pu1_cur_ctb_chroma + cols_remaining - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_RIGHT); - + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + WORD32 pad_ht_chroma = ctb_size / 2; + pad_ht_chroma += (ps_sps->i2_pic_ht_in_ctb - 1) == i4_ctb_y ? 8 : 0; + ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(pu1_cur_ctb_chroma + cols_remaining - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_RIGHT); + } if((ps_sps->i2_pic_ht_in_ctb - 1) == i4_ctb_y) { UWORD8 *pu1_buf; + /* Since SAO is shifted by 8x8, chroma padding can not be done till second row is processed */ /* Hence moving top padding to to end of frame, Moving it to second row also results in problems when there is only one row */ /* Pad top after padding left and right for current rows after processing 1st CTB row */ ihevc_pad_top(ps_deblk_ctxt->pu1_cur_pic_luma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP); - ihevc_pad_top(ps_deblk_ctxt->pu1_cur_pic_chroma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP / 2); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ihevc_pad_top(ps_deblk_ctxt->pu1_cur_pic_chroma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP / 2); + } pu1_buf = ps_deblk_ctxt->pu1_cur_pic_luma + ps_codec->i4_strd * ps_sps->i2_pic_height_in_luma_samples - PAD_LEFT; /* Pad top after padding left and right for current rows after processing 1st CTB row */ ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT); - pu1_buf = ps_deblk_ctxt->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2) - PAD_LEFT; - ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT / 2); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + pu1_buf = ps_deblk_ctxt->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2) - PAD_LEFT; + ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT / 2); + } } } } diff --git a/decoder/ihevcd_inter_pred.c b/decoder/ihevcd_inter_pred.c index 8e3fe77..6082a3f 100644 --- a/decoder/ihevcd_inter_pred.c +++ b/decoder/ihevcd_inter_pred.c @@ -70,25 +70,6 @@ #include "ihevc_inter_pred.h" #include "ihevcd_profile.h" -static WORD8 gai1_luma_filter[4][NTAPS_LUMA] = -{ - { 0, 0, 0, 64, 0, 0, 0, 0 }, - { -1, 4, -10, 58, 17, -5, 1, 0 }, - { -1, 4, -11, 40, 40, -11, 4, -1 }, - { 0, 1, -5, 17, 58, -10, 4, -1 } }; - -/* The filter uses only the first four elements in each array */ -static WORD8 gai1_chroma_filter[8][NTAPS_LUMA] = -{ - { 0, 64, 0, 0, 0, 0, 0, 0 }, - { -2, 58, 10, -2, 0, 0, 0, 0 }, - { -4, 54, 16, -2, 0, 0, 0, 0 }, - { -6, 46, 28, -4, 0, 0, 0, 0 }, - { -4, 36, 36, -4, 0, 0, 0, 0 }, - { -4, 28, 46, -6, 0, 0, 0, 0 }, - { -2, 16, 54, -4, 0, 0, 0, 0 }, - { -2, 10, 58, -2, 0, 0, 0, 0 } }; - /** ******************************************************************************* * @@ -163,6 +144,10 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) WORD32 next_ctb_idx; WORD8(*coeff)[8]; WORD32 chroma_yuv420sp_vu; + WORD32 num_comp; + WORD32 h_samp_factor, v_samp_factor; + WORD32 chroma_pixel_strd = 2; + WORD32 is_yuv420, is_yuv444; PROFILE_DISABLE_INTER_PRED(); ps_codec = ps_proc->ps_codec; @@ -211,6 +196,12 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) chroma_offset_l1_cb = 0; chroma_offset_l1_cr = 0; + num_comp = ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_MONOCHROME ? 1 : 2; + is_yuv420 = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 1 : 0; + is_yuv444 = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 0; + h_samp_factor = is_yuv444 ? 1 : 2; + v_samp_factor = is_yuv420 ? 2 : 1; + for(pu_indx = 0; pu_indx < i4_pu_cnt; pu_indx++, ps_pu++) { /* If the PU is intra then proceed to the next */ @@ -233,15 +224,20 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) ps_pic_buf_l0 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list0[ps_pu->mv.i1_l0_ref_idx].pv_pic_buf)); ref_pic_luma_l0 = ps_pic_buf_l0->pu1_luma; - ref_pic_chroma_l0 = ps_pic_buf_l0->pu1_chroma; luma_weight_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l0[ps_pu->mv.i1_l0_ref_idx]; - chroma_weight_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cb[ps_pu->mv.i1_l0_ref_idx]; - chroma_weight_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cr[ps_pu->mv.i1_l0_ref_idx]; luma_offset_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l0[ps_pu->mv.i1_l0_ref_idx]; - chroma_offset_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cb[ps_pu->mv.i1_l0_ref_idx]; - chroma_offset_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cr[ps_pu->mv.i1_l0_ref_idx]; + + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ref_pic_chroma_l0 = ps_pic_buf_l0->pu1_chroma; + chroma_weight_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cb[ps_pu->mv.i1_l0_ref_idx]; + chroma_weight_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cr[ps_pu->mv.i1_l0_ref_idx]; + + chroma_offset_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cb[ps_pu->mv.i1_l0_ref_idx]; + chroma_offset_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cr[ps_pu->mv.i1_l0_ref_idx]; + } } if(ps_pu->b2_pred_mode != PRED_L0) @@ -249,19 +245,24 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) pic_buf_t *ps_pic_buf_l1; ps_pic_buf_l1 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list1[ps_pu->mv.i1_l1_ref_idx].pv_pic_buf)); ref_pic_luma_l1 = ps_pic_buf_l1->pu1_luma; - ref_pic_chroma_l1 = ps_pic_buf_l1->pu1_chroma; luma_weight_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l1[ps_pu->mv.i1_l1_ref_idx]; - chroma_weight_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cb[ps_pu->mv.i1_l1_ref_idx]; - chroma_weight_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cr[ps_pu->mv.i1_l1_ref_idx]; luma_offset_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l1[ps_pu->mv.i1_l1_ref_idx]; - chroma_offset_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cb[ps_pu->mv.i1_l1_ref_idx]; - chroma_offset_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cr[ps_pu->mv.i1_l1_ref_idx]; + + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ref_pic_chroma_l1 = ps_pic_buf_l1->pu1_chroma; + chroma_weight_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cb[ps_pu->mv.i1_l1_ref_idx]; + chroma_weight_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cr[ps_pu->mv.i1_l1_ref_idx]; + + chroma_offset_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cb[ps_pu->mv.i1_l1_ref_idx]; + chroma_offset_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cr[ps_pu->mv.i1_l1_ref_idx]; + } } /*luma and chroma components*/ - for(clr_indx = 0; clr_indx < 2; clr_indx++) + for(clr_indx = 0; clr_indx < num_comp; clr_indx++) { PROFILE_DISABLE_INTER_PRED_LUMA(clr_indx); PROFILE_DISABLE_INTER_PRED_CHROMA(clr_indx); @@ -308,7 +309,7 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) pu1_dst = pu1_dst_luma + pu_y * ref_strd + pu_x; ntaps = NTAPS_LUMA; - coeff = gai1_luma_filter; + coeff = gai1_ihevc_luma_filter; } else @@ -319,15 +320,14 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) if(ps_pu->b2_pred_mode != PRED_L1) { mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2)); - ai2_xint[0] = (pu_x / 2 + (mv >> 3)) << 1; - ai2_xfrac[0] = mv & 7; + ai2_xint[0] = (pu_x * chroma_pixel_strd / h_samp_factor) + (mv >> (2 + h_samp_factor - 1)) * chroma_pixel_strd; + ai2_xfrac[0] = mv & (is_yuv420 ? 7 : 3); mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2)); - ai2_yint[0] = pu_y / 2 + (mv >> 3); - ai2_yfrac[0] = mv & 7; + ai2_yint[0] = ((pu_y / v_samp_factor) + (mv >> (2 + v_samp_factor - 1))); + ai2_yfrac[0] = mv & (is_yuv444 ? 3 : 7); - ref_pic_l0 = ref_pic_chroma_l0 + ai2_yint[0] * ref_strd - + ai2_xint[0]; + ref_pic_l0 = ref_pic_chroma_l0 + ai2_yint[0] * (ref_strd * chroma_pixel_strd / h_samp_factor) + ai2_xint[0]; ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask; ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask; @@ -337,24 +337,25 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) if(ps_pu->b2_pred_mode != PRED_L0) { mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2)); - ai2_xint[1] = (pu_x / 2 + (mv >> 3)) << 1; - ai2_xfrac[1] = mv & 7; + ai2_xint[1] = (pu_x * chroma_pixel_strd / h_samp_factor) + (mv >> (2 + h_samp_factor - 1)) * chroma_pixel_strd; + ai2_xfrac[1] = mv & (is_yuv420 ? 7 : 3); mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2)); - ai2_yint[1] = pu_y / 2 + (mv >> 3); - ai2_yfrac[1] = mv & 7; + ai2_yint[1] = ((pu_y / v_samp_factor) + (mv >> (2 + v_samp_factor - 1))); + ai2_yfrac[1] = mv & (is_yuv444 ? 3 : 7); + + ref_pic_l1 = ref_pic_chroma_l1 + ai2_yint[1] * (ref_strd * chroma_pixel_strd / h_samp_factor) + ai2_xint[1]; - ref_pic_l1 = ref_pic_chroma_l1 + ai2_yint[1] * ref_strd - + ai2_xint[1]; ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask; ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask; } - pu1_dst = pu1_dst_chroma + pu_y * ref_strd / 2 + pu_x; + pu1_dst = pu1_dst_chroma + (pu_y / v_samp_factor) * (ref_strd * chroma_pixel_strd / h_samp_factor) + + (pu_x * chroma_pixel_strd / h_samp_factor); ntaps = NTAPS_CHROMA; - coeff = gai1_chroma_filter; + coeff = gai1_ihevc_chroma_filter; } if(ps_pu->b2_pred_mode != PRED_L1) @@ -396,6 +397,10 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) if(func_ptr1 != NULL) { func_src_strd = ref_strd; + if (clr_indx != 0) + { + func_src_strd *= (chroma_pixel_strd / h_samp_factor); + } func_src = (ai2_xfrac[0] && ai2_yfrac[0]) ? ref_pic_l0 - (ntaps / 2 - 1) * func_src_strd : ref_pic_l0; @@ -409,10 +414,14 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) func_dst_strd = (weighted_pred || bi_pred || (ai2_xfrac[0] && ai2_yfrac[0])) ? pu_wd : ref_strd; - func_coeff = ai2_xfrac[0] ? - coeff[ai2_xfrac[0]] : coeff[ai2_yfrac[0]]; - func_wd = pu_wd >> clr_indx; - func_ht = pu_ht >> clr_indx; + if (clr_indx != 0) + { + func_dst_strd *= (chroma_pixel_strd / h_samp_factor); + } + func_coeff = ai2_xfrac[0] ? coeff[ai2_xfrac[0] << (is_yuv444 ? clr_indx : 0)] + : coeff[ai2_yfrac[0] << (is_yuv444 ? clr_indx : 0)]; + func_wd = pu_wd >> (is_yuv420 ? clr_indx : 0); + func_ht = pu_ht >> (is_yuv444 ? 0 : clr_indx); func_ht += (ai2_xfrac[0] && ai2_yfrac[0]) ? ntaps - 1 : 0; func_ptr1(func_src, func_dst, func_src_strd, func_dst_strd, func_coeff, func_ht, func_wd); @@ -422,15 +431,23 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) if(func_ptr2 != NULL) { func_src_strd = pu_wd; + if (clr_indx != 0) + { + func_src_strd *= (chroma_pixel_strd / h_samp_factor); + } func_src = pi2_tmp1 + (ntaps / 2 - 1) * func_src_strd; func_dst = (weighted_pred || bi_pred) ? (void *)pi2_tmp1 : (void *)pu1_dst; func_dst_strd = (weighted_pred || bi_pred) ? pu_wd : ref_strd; - func_coeff = coeff[ai2_yfrac[0]]; - func_wd = pu_wd >> clr_indx; - func_ht = pu_ht >> clr_indx; + if (clr_indx != 0) + { + func_dst_strd *= (chroma_pixel_strd / h_samp_factor); + } + func_coeff = coeff[ai2_yfrac[0] << (is_yuv444 ? clr_indx : 0)]; + func_wd = pu_wd >> (is_yuv420 ? clr_indx : 0); + func_ht = pu_ht >> (is_yuv444 ? 0 : clr_indx); func_ptr2(func_src, func_dst, func_src_strd, func_dst_strd, func_coeff, func_ht, func_wd); } @@ -438,6 +455,10 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) if(func_ptr3 != NULL) { func_src_strd = ref_strd; + if (clr_indx != 0) + { + func_src_strd *= (chroma_pixel_strd / h_samp_factor); + } func_src = (ai2_xfrac[1] && ai2_yfrac[1]) ? ref_pic_l1 - (ntaps / 2 - 1) * func_src_strd : ref_pic_l1; @@ -451,28 +472,42 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) func_dst_strd = (weighted_pred || bi_pred || (ai2_xfrac[1] && ai2_yfrac[1])) ? pu_wd : ref_strd; - func_coeff = ai2_xfrac[1] ? - coeff[ai2_xfrac[1]] : coeff[ai2_yfrac[1]]; - func_wd = pu_wd >> clr_indx; - func_ht = pu_ht >> clr_indx; + if (clr_indx != 0) + { + func_dst_strd *= (chroma_pixel_strd / h_samp_factor); + } + func_coeff = ai2_xfrac[1] ? coeff[ai2_xfrac[1] << (is_yuv444 ? clr_indx : 0)] + : coeff[ai2_yfrac[1] << (is_yuv444 ? clr_indx : 0)]; + func_wd = pu_wd >> (is_yuv420 ? clr_indx : 0); + func_ht = pu_ht >> (is_yuv444 ? 0 : clr_indx); + func_ht += (ai2_xfrac[1] && ai2_yfrac[1]) ? ntaps - 1 : 0; func_ptr3(func_src, func_dst, func_src_strd, func_dst_strd, func_coeff, func_ht, func_wd); + } if(func_ptr4 != NULL) { func_src_strd = pu_wd; + if (clr_indx != 0) + { + func_src_strd *= (chroma_pixel_strd / h_samp_factor); + } func_src = pi2_tmp2 + (ntaps / 2 - 1) * func_src_strd; func_dst = (weighted_pred || bi_pred) ? (void *)pi2_tmp2 : (void *)pu1_dst; func_dst_strd = (weighted_pred || bi_pred) ? pu_wd : ref_strd; - func_coeff = coeff[ai2_yfrac[1]]; - func_wd = pu_wd >> clr_indx; - func_ht = pu_ht >> clr_indx; + if (clr_indx != 0) + { + func_dst_strd *= (chroma_pixel_strd / h_samp_factor); + } + func_coeff = coeff[ai2_yfrac[1] << (is_yuv444 ? clr_indx : 0)]; + func_wd = pu_wd >> (is_yuv420 ? clr_indx : 0); + func_ht = pu_ht >> (is_yuv444 ? 0 : clr_indx); func_ptr4(func_src, func_dst, func_src_strd, func_dst_strd, func_coeff, func_ht, func_wd); @@ -518,15 +553,17 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) { shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom + SHIFT_14_MINUS_BIT_DEPTH + 1; + func_src_strd = pu_wd * (chroma_pixel_strd / h_samp_factor); + func_dst_strd = ref_strd * (chroma_pixel_strd / h_samp_factor); if(chroma_yuv420sp_vu) { ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1, pi2_tmp2, pu1_dst, - pu_wd, - pu_wd, - ref_strd, + func_src_strd, + func_src_strd, + func_dst_strd, chroma_weight_l0_cr, chroma_weight_l0_cb, chroma_offset_l0_cr, @@ -538,17 +575,17 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) shift, lvl_shift1, lvl_shift2, - pu_ht >> 1, - pu_wd >> 1); + pu_ht >> (is_yuv444 ? 0 : clr_indx), + pu_wd >> (is_yuv420 ? clr_indx : 0)); } else { ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1, pi2_tmp2, pu1_dst, - pu_wd, - pu_wd, - ref_strd, + func_src_strd, + func_src_strd, + func_dst_strd, chroma_weight_l0_cb, chroma_weight_l0_cr, chroma_offset_l0_cb, @@ -560,8 +597,8 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) shift, lvl_shift1, lvl_shift2, - pu_ht >> 1, - pu_wd >> 1); + pu_ht >> (is_yuv444 ? 0 : clr_indx), + pu_wd >> (is_yuv420 ? clr_indx : 0)); } } } @@ -600,36 +637,38 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) { shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom + SHIFT_14_MINUS_BIT_DEPTH; + func_src_strd = pu_wd * (chroma_pixel_strd / h_samp_factor); + func_dst_strd = ref_strd * (chroma_pixel_strd / h_samp_factor); if(chroma_yuv420sp_vu) { ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2, pu1_dst, - pu_wd, - ref_strd, + func_src_strd, + func_dst_strd, ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr, ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb, ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr, ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb, shift, lvl_shift1, - pu_ht >> 1, - pu_wd >> 1); + pu_ht >> (is_yuv444 ? 0 : clr_indx), + pu_wd >> (is_yuv420 ? clr_indx : 0)); } else { ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2, pu1_dst, - pu_wd, - ref_strd, + func_src_strd, + func_dst_strd, ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb, ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr, ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb, ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr, shift, lvl_shift1, - pu_ht >> 1, - pu_wd >> 1); + pu_ht >> (is_yuv444 ? 0 : clr_indx), + pu_wd >> (is_yuv420 ? clr_indx : 0)); } } } @@ -638,26 +677,33 @@ void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) { lvl_shift1 = 0; lvl_shift2 = 0; + if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0])) lvl_shift1 = (1 << 13); if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1])) lvl_shift2 = (1 << 13); - if(clr_indx != 0) + func_src_strd = pu_wd; + func_dst_strd = ref_strd; + if (clr_indx != 0) { - pu_ht = (pu_ht >> 1); + func_src_strd *= (chroma_pixel_strd / h_samp_factor); + func_dst_strd *= (chroma_pixel_strd / h_samp_factor); } + func_ht = pu_ht >> (is_yuv444 ? 0 : clr_indx); + func_wd = pu_wd << (is_yuv444 ? clr_indx : 0); + ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr(pi2_tmp1, pi2_tmp2, pu1_dst, - pu_wd, - pu_wd, - ref_strd, + func_src_strd, + func_src_strd, + func_dst_strd, lvl_shift1, lvl_shift2, - pu_ht, - pu_wd); + func_ht, + func_wd); } } diff --git a/decoder/ihevcd_iquant_itrans_recon_ctb.c b/decoder/ihevcd_iquant_itrans_recon_ctb.c index 6af3001..eb96ff6 100644 --- a/decoder/ihevcd_iquant_itrans_recon_ctb.c +++ b/decoder/ihevcd_iquant_itrans_recon_ctb.c @@ -80,8 +80,6 @@ #include "ihevcd_statistics.h" #include "ihevcd_itrans_recon_dc.h" -static const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; - /* Globals */ static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] = @@ -132,9 +130,93 @@ const WORD16 *g_ai2_ihevc_trans_tables[] = }; +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ +/** + * Structure to hold fields required for iq it recon construction process + */ +typedef struct +{ + /* + * parsed transform coeffs + */ + WORD16 *pi2_tu_coeff; + + /** + * pred buffer + */ + UWORD8 *pu1_pred; + + /** + * recon buffer + */ + UWORD8 *pu1_dst; + + /** + * transform coeffs buffer stride + */ + WORD32 tu_coeff_stride; + + /** + * pred buffer stride + */ + WORD32 pred_strd; + + /** + * recon buffer stride + */ + WORD32 dst_strd; + + /** + * zero cols, zero rows for optimizing itrans process + */ + UWORD32 zero_cols; + UWORD32 zero_rows; + + /** + * dc only? for optimizing itrans process + */ + UWORD32 coeff_type; + WORD16 coeff_value; + + /** + * cbf + */ + UWORD8 cbf; + + /** + * is transform skip + */ + UWORD8 transform_skip_flag; + +#ifdef ENABLE_MAIN_REXT_PROFILE + /** + * is explicit rdpcm enabled + */ + UWORD8 explicit_rdpcm_flag; + + /** + * explicit rdpcm dir + */ + UWORD8 explicit_rdpcm_dir; +#endif + +} tu_plane_iq_it_recon_ctxt_t; + + /*****************************************************************************/ /* Function Prototypes */ /*****************************************************************************/ +typedef void (*PF_IQITRECON_PLANE)(process_ctxt_t *ps_proc, + tu_t *ps_tu, + tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, + WORD32 func_idx, + WORD32 log2_trans_size, + CHROMA_PLANE_ID_T chroma_plane, + WORD8 intra_flag, + WORD8 intra_pred_mode); + /* Returns number of ai2_level read from ps_sblk_coeff */ UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff, WORD32 log2_trans_size, @@ -178,7 +260,11 @@ UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff, u1_scan_type = *pu1_tu_coeff_data++; /* 0th bit has trans_skip */ trans_skip = u1_scan_type & 1; +#ifdef ENABLE_MAIN_REXT_PROFILE + u1_scan_type = (u1_scan_type & 0xF) >> 1; +#else u1_scan_type >>= 1; +#endif pi2_sblk_ptr = pi2_tu_coeff; @@ -527,6 +613,210 @@ WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc, } +static void ihevcd_iquant_itrans_recon_tu_plane(process_ctxt_t *ps_proc, + tu_t *ps_tu, + tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, + WORD32 func_idx, + WORD32 log2_trans_size, + CHROMA_PLANE_ID_T chroma_plane, + WORD8 intra_flag, + WORD8 intra_pred_mode) +{ + sps_t *ps_sps = ps_proc->ps_sps; + pps_t *ps_pps = ps_proc->ps_pps; + codec_t *ps_codec = ps_proc->ps_codec; + + if(1 == ps_pl_tu_ctxt->cbf) + { + if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag) + { + /* Recon */ + ps_codec->apf_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff, ps_pl_tu_ctxt->pu1_pred, + ps_pl_tu_ctxt->pu1_dst, ps_pl_tu_ctxt->tu_coeff_stride, + ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, + ps_pl_tu_ctxt->zero_cols); + } + else + { + /* iQuant , iTrans and Recon */ + if((0 == ps_pl_tu_ctxt->coeff_type)) + { + ps_codec->apf_itrans_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff, + ps_proc->pi2_itrans_intrmd_buf, + ps_pl_tu_ctxt->pu1_pred, + ps_pl_tu_ctxt->pu1_dst, + ps_pl_tu_ctxt->tu_coeff_stride, + ps_pl_tu_ctxt->pred_strd, + ps_pl_tu_ctxt->dst_strd, + ps_pl_tu_ctxt->zero_cols, + ps_pl_tu_ctxt->zero_rows); + } + else /* DC only */ + { + ps_codec->apf_itrans_recon_dc[chroma_plane != NULL_PLANE]( + ps_pl_tu_ctxt->pu1_pred, ps_pl_tu_ctxt->pu1_dst, + ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, log2_trans_size, + ps_pl_tu_ctxt->coeff_value); + } + } + } +} + +#ifdef ENABLE_MAIN_REXT_PROFILE +static void ihevcd_iquant_itrans_resi_recon_tu_plane(process_ctxt_t *ps_proc, + tu_t *ps_tu, + tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, + WORD32 func_idx, + WORD32 log2_trans_size, + CHROMA_PLANE_ID_T chroma_plane, + WORD8 intra_flag, + WORD8 intra_pred_mode) +{ + sps_t *ps_sps = ps_proc->ps_sps; + pps_t *ps_pps = ps_proc->ps_pps; + codec_t *ps_codec = ps_proc->ps_codec; + WORD8 trans_size = 1 << log2_trans_size; + WORD16 *pi2_res = ps_proc->pi2_res_luma_buf; + WORD16 *pi2_res_uv = ps_proc->pi2_res_chroma_buf; + WORD32 alpha = 0; + WORD16 *residue_out_base = chroma_plane == NULL_PLANE ? pi2_res : pi2_res_uv; + WORD16 *residue_out = residue_out_base; + // if both rdpcm and rotate are to be applied, share the output residue buffer between the + // two transforms + WORD16 *residue_out_intrmdt = residue_out_base + (TRANS_SIZE_4 * TRANS_SIZE_4); + + if(chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0) + { + alpha = (1 << (ps_tu->b3_cb_log2_res_scale_abs_plus1 - 1)) + * (1 - 2 * ps_tu->b1_cb_log2_res_sign); + } + else if(chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0) + { + alpha = (1 << (ps_tu->b3_cr_log2_res_scale_abs_plus1 - 1)) + * (1 - 2 * ps_tu->b1_cr_log2_res_sign); + } + if(1 == ps_pl_tu_ctxt->cbf) + { + if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag) + { + WORD8 rotate = ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4 + && intra_flag; + WORD8 rdpcm = (ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag + && (intra_pred_mode == 10 || intra_pred_mode == 26)) + || ps_pl_tu_ctxt->explicit_rdpcm_flag; + WORD16 *src_residue = ps_pl_tu_ctxt->pi2_tu_coeff; + WORD16 src_residue_strd = ps_pl_tu_ctxt->tu_coeff_stride; + + if(rotate) + { + ihevc_res_4x4_rotate(src_residue, rdpcm ? residue_out_intrmdt : residue_out, + src_residue_strd, trans_size, ps_pl_tu_ctxt->zero_cols); + ps_pl_tu_ctxt->zero_cols = + gau4_ihevcd_4_bit_reverse[ps_pl_tu_ctxt->zero_cols & 0xF]; + src_residue = residue_out_intrmdt; + src_residue_strd = trans_size; + } + + if(rdpcm) + { + WORD8 rdpcm_dir = + ps_pl_tu_ctxt->explicit_rdpcm_flag ? + ps_pl_tu_ctxt->explicit_rdpcm_dir : + intra_pred_mode != 10; + if(rdpcm_dir == 0) + { + ihevc_res_nxn_rdpcm_horz(src_residue, residue_out, src_residue_strd, trans_size, + trans_size, ps_pl_tu_ctxt->zero_cols); + ps_pl_tu_ctxt->zero_cols = (1 << CTZ(~ps_pl_tu_ctxt->zero_cols)) - 1; + } + else + { + ihevc_res_nxn_rdpcm_vert(src_residue, residue_out, src_residue_strd, trans_size, + trans_size, ps_pl_tu_ctxt->zero_cols); + } + } + + if(!rdpcm && !rotate) + { + ihevc_res_nxn_copy(src_residue, residue_out, src_residue_strd, trans_size, + trans_size, ps_pl_tu_ctxt->zero_cols); + } + } + else + { + /* iQuant, iTrans */ + if(0 == ps_pl_tu_ctxt->coeff_type) + { + WORD32 func_tmp_idx = chroma_plane != NULL_PLANE ? func_idx - 4 : func_idx; + ps_codec->apf_itrans_res[func_tmp_idx](ps_pl_tu_ctxt->pi2_tu_coeff, + ps_proc->pi2_itrans_intrmd_buf, residue_out, + ps_pl_tu_ctxt->tu_coeff_stride, trans_size, + ps_pl_tu_ctxt->zero_cols, + ps_pl_tu_ctxt->zero_rows); + } + else /* DC only */ + { + ps_codec->apf_itrans_res_dc(residue_out, trans_size, log2_trans_size, + ps_pl_tu_ctxt->coeff_value); + } + ps_pl_tu_ctxt->zero_cols = 0; + } + if(!alpha) + { + ps_codec->apf_recon[func_idx](residue_out, ps_pl_tu_ctxt->pu1_pred, + ps_pl_tu_ctxt->pu1_dst, trans_size, + ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, + ps_pl_tu_ctxt->zero_cols); + } + } + if(alpha) + { + if(0 == ps_pl_tu_ctxt->cbf) + { + memset(residue_out, 0, trans_size * trans_size * sizeof(WORD16)); + } + ihevc_chroma_recon_nxn_ccp(pi2_res, pi2_res_uv, ps_pl_tu_ctxt->pu1_pred, + ps_pl_tu_ctxt->pu1_dst, alpha, trans_size, trans_size, + trans_size, ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd); + } +} + +PF_IQITRECON_PLANE get_iqitrec_func(process_ctxt_t *ps_proc, + tu_t *ps_tu, + tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt, + WORD32 log2_trans_size, + CHROMA_PLANE_ID_T chroma_plane, + WORD8 intra_flag, + WORD8 intra_pred_mode) +{ + sps_t *ps_sps = ps_proc->ps_sps; + pps_t *ps_pps = ps_proc->ps_pps; + WORD8 trans_size = 1 << log2_trans_size; + + if(1 == ps_pl_tu_ctxt->cbf + && (ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag)) + { + if(ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4 && intra_flag) + return ihevcd_iquant_itrans_resi_recon_tu_plane; + if(ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag + && (intra_pred_mode == 10 || intra_pred_mode == 26)) + return ihevcd_iquant_itrans_resi_recon_tu_plane; + if(ps_pl_tu_ctxt->explicit_rdpcm_flag) + return ihevcd_iquant_itrans_resi_recon_tu_plane; + } + if(ps_pps->i1_cross_component_prediction_enabled_flag) + { + if((chroma_plane == NULL_PLANE + && (ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0 + || ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0)) + || (chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0) + || (chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0)) + return ihevcd_iquant_itrans_resi_recon_tu_plane; + } + return ihevcd_iquant_itrans_recon_tu_plane; +} +#endif + WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) { WORD16 *pi2_scaling_mat; @@ -539,40 +829,48 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) WORD16 *pi2_ctb_coeff; WORD32 tu_cnt; WORD16 *pi2_tu_coeff; - WORD16 *pi2_tmp; WORD32 pic_strd; WORD32 luma_nbr_flags; WORD32 luma_nbr_flags_4x4[4] = { 0 }; WORD32 chroma_nbr_flags = 0; + WORD32 chroma_nbr_flags_subtu = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD32 disable_boundary_filter = 0; +#endif UWORD8 u1_luma_pred_mode_first_tu = 0; /* Pointers for generating 2d coeffs from coeff-map */ UWORD8 *pu1_tu_coeff_data; /* nbr avail map for CTB */ /* 1st bit points to neighbor (left/top_left/bot_left) */ /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */ - UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE - + 2 /* Top nbr + bot nbr */]; UWORD32 - top_avail_bits; + UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE + 2 /* Top nbr + bot nbr */]; + UWORD32 top_avail_bits; sps_t *ps_sps; pps_t *ps_pps; WORD32 intra_flag; UWORD8 *pu1_pic_intra_flag; + WORD32 h_samp_factor, v_samp_factor; + WORD32 chroma_pixel_strd = 2; + PF_IQITRECON_PLANE iqitrecon_fptr = ihevcd_iquant_itrans_recon_tu_plane; + /*************************************************************************/ /* Contanis scaling matrix offset in the following order in a 1D buffer */ + /* Entries that are listed as UNUSED are invalid combinations where */ + /* scaling matrix is not used. eg: 64x64 SKIP CU, 64x64 PCM CU */ /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */ /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */ /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */ /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */ /* Intra 16x16 Y, 16x16 U, 16x16 V */ /* Inter 16x16 Y, 16x16 U, 16x16 V */ - /* Intra 32x32 Y */ - /* Inter 32x32 Y */ + /* Intra 32x32 Y, 32x32 U, 32x32 V */ + /* Inter 32x32 Y, 32x32 U, 32x32 V */ + /* UNUSED, UNUSED, UNUSED */ + /* UNUSED, UNUSED, UNUSED */ /*************************************************************************/ - /* Only first 20 entries are used. Array is extended to avoid out of bound - reads. Skip CUs (64x64) read this table, but don't really use the value */ static const WORD32 scaling_mat_offset[] = { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992, - 1248, 1504, 1760, 2016, 3040, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + 1248, 1504, 1760, 2016, 3040, 4064, 5088, 6112, 7136, 0, 0, 0, 0, 0, 0}; PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED(); @@ -591,8 +889,6 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) pic_strd = ps_codec->i4_strd; - pi2_tmp = ps_proc->pi2_itrans_intrmd_buf; - pi2_tu_coeff = pi2_ctb_coeff; ps_tu = ps_proc->ps_tu; @@ -653,35 +949,40 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) } + h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2; + v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1; + /* Applying Inverse transform on all the TU's in CTB */ for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++) { - WORD32 transform_skip_flag = 0; - WORD32 transform_skip_flag_v = 0; + tu_plane_iq_it_recon_ctxt_t y_cb_tu = { 0 }; + tu_plane_iq_it_recon_ctxt_t cr_tu = { 0 }; + tu_plane_iq_it_recon_ctxt_t *ps_cb_tu = &y_cb_tu; + tu_plane_iq_it_recon_ctxt_t *ps_cr_tu = &cr_tu; +#ifdef ENABLE_MAIN_REXT_PROFILE + tu_plane_iq_it_recon_ctxt_t cb_sub_tu = { 0 }; + tu_plane_iq_it_recon_ctxt_t cr_sub_tu = { 0 }; +#endif + WORD32 num_comp, c_idx, func_idx; - WORD32 src_strd, pred_strd, dst_strd; + WORD32 qp_div = 0, qp_rem = 0; WORD32 qp_div_v = 0, qp_rem_v = 0; - UWORD32 zero_cols = 0, zero_cols_v = 0; - UWORD32 zero_rows = 0, zero_rows_v = 0; - UWORD32 coeff_type = 0, coeff_type_v = 0; - WORD16 i2_coeff_value, i2_coeff_value_v; + WORD32 chroma_qp_idx; + WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset; + WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL; + WORD32 trans_size = 0; TRANSFORM_TYPE e_trans_type; WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2; WORD32 log2_trans_size; - WORD32 chroma_qp_idx; - WORD16 *pi2_src = NULL, *pi2_src_v = NULL; - UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL; - UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL; - WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL; + WORD32 tu_x, tu_y; WORD32 tu_y_offset, tu_uv_offset; - WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset; - UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode; + UWORD8 u1_luma_pred_mode, u1_chroma_pred_mode; WORD32 offset; WORD32 pcm_flag; - WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); + WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); /* If 420SP_VU is chroma format, pred and dst pointer */ /* will be added +1 to point to U */ WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu; @@ -709,12 +1010,11 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) u1_luma_pred_mode = ps_tu->b6_luma_intra_mode; u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx; - if(u1_chroma_pred_mode != 7) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && u1_chroma_pred_mode != 7) num_comp = 2; /* Y and UV */ else num_comp = 1; /* Y */ - pcm_flag = 0; if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE)) @@ -733,7 +1033,6 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) tu_y_offset = tu_x + tu_y * pic_strd; pu1_y_dst += tu_x + tu_y * pic_strd; - pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd; /* First byte points to number of coded blocks */ pu1_tu_coeff_data++; @@ -753,25 +1052,32 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) pu1_buf += cb_size; } - pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset; - - /* U */ - for(i = 0; i < cb_size / 2; i++) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { - for(j = 0; j < cb_size / 2; j++) + WORD32 chroma_strd = (pic_strd * chroma_pixel_strd) / h_samp_factor; + + pu1_uv_dst += (tu_x * chroma_pixel_strd / h_samp_factor) + + (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor)); + pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset; + + /* U */ + for(i = 0; i < cb_size / v_samp_factor; i++) { - pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++; + for(j = 0; j < cb_size / h_samp_factor; j++) + { + pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++; + } } - } - pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset; + pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset; - /* V */ - for(i = 0; i < cb_size / 2; i++) - { - for(j = 0; j < cb_size / 2; j++) + /* V */ + for(i = 0; i < cb_size / v_samp_factor; i++) { - pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++; + for(j = 0; j < cb_size / h_samp_factor; j++) + { + pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++; + } } } } @@ -788,32 +1094,20 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) { if(0 == pcm_flag) { - /* Initializing variables */ - pred_strd = pic_strd; - dst_strd = pic_strd; if(c_idx == 0) /* Y */ { + /* Initializing variables */ + log2_y_trans_size_minus_2 = ps_tu->b3_size; trans_size = 1 << (log2_y_trans_size_minus_2 + 2); log2_trans_size = log2_y_trans_size_minus_2 + 2; tu_y_offset = tu_x + tu_y * pic_strd; - pi2_src = pi2_tu_coeff; - pu1_pred = pu1_y_dst_ctb + tu_y_offset; - pu1_dst = pu1_y_dst_ctb + tu_y_offset; - /* Calculating scaling matrix offset */ - offset = log2_y_trans_size_minus_2 * 6 - + (!intra_flag) - * ((log2_y_trans_size_minus_2 - == 3) ? 1 : 3) - + c_idx; - pi2_dequant_matrix = pi2_scaling_mat - + scaling_mat_offset[offset]; - - src_strd = trans_size; + offset = log2_y_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx; + pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset]; /* 4x4 transform Luma in INTRA mode is DST */ if(log2_y_trans_size_minus_2 == 0 && intra_flag) @@ -830,45 +1124,69 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) qp_div = ps_tu->b7_qp / 6; qp_rem = ps_tu->b7_qp % 6; - u1_cbf = ps_tu->b1_y_cbf; - - transform_skip_flag = pu1_tu_coeff_data[1] & 1; + y_cb_tu.pi2_tu_coeff = pi2_tu_coeff; + y_cb_tu.pu1_pred = pu1_y_dst_ctb + tu_y_offset; + y_cb_tu.pu1_dst = pu1_y_dst_ctb + tu_y_offset; + y_cb_tu.tu_coeff_stride = trans_size; + y_cb_tu.pred_strd = pic_strd; + y_cb_tu.dst_strd = pic_strd; + y_cb_tu.cbf = ps_tu->b1_y_cbf; + y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; +#ifdef ENABLE_MAIN_REXT_PROFILE + y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; + y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; +#endif /* Unpacking coeffs */ - if(1 == u1_cbf) + if(1 == y_cb_tu.cbf) { pu1_tu_coeff_data = ihevcd_unpack_coeffs( - pi2_src, log2_y_trans_size_minus_2 + 2, + y_cb_tu.pi2_tu_coeff, log2_y_trans_size_minus_2 + 2, pu1_tu_coeff_data, pi2_dequant_matrix, qp_rem, qp_div, e_trans_type, - ps_tu->b1_transquant_bypass, &zero_cols, - &zero_rows, &coeff_type, - &i2_coeff_value); + ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols, + &y_cb_tu.zero_rows, &y_cb_tu.coeff_type, + &y_cb_tu.coeff_value); } } else /* UV interleaved */ { + /* Initializing variables */ + const WORD16 *pi2_ihevcd_chroma_qp = + CHROMA_FMT_IDC_YUV420 != ps_sps->i1_chroma_format_idc ? + gai2_ihevcd_chroma_qp_clip : + gai2_ihevcd_chroma_qp_420; + /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */ if(ps_tu->b3_size == 0) { - /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */ log2_uv_trans_size_minus_2 = ps_tu->b3_size; - tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd; + if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) + { + tu_uv_offset = (tu_x * chroma_pixel_strd) + + (tu_y * chroma_pixel_strd * pic_strd); + } + else + { + /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x - 4, luma pos y - 4) */ + tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / v_samp_factor) * pic_strd; + } } else { - log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1; - tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd; + if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) + { + log2_uv_trans_size_minus_2 = ps_tu->b3_size; + } + else + { + log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1; + } + tu_uv_offset = (tu_x * chroma_pixel_strd / h_samp_factor) + + (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor)); } trans_size = 1 << (log2_uv_trans_size_minus_2 + 2); log2_trans_size = log2_uv_trans_size_minus_2 + 2; - pi2_src = pi2_tu_coeff; - pi2_src_v = pi2_tu_coeff + trans_size * trans_size; - pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ - pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ - pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ - pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ - /*TODO: Add support for choosing different tables for U and V, * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes */ @@ -876,68 +1194,136 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since * max uv trans size is 16x16 */ - offset = log2_uv_trans_size_minus_2 * 6 - + (!intra_flag) * 3 + c_idx; - pi2_dequant_matrix = pi2_scaling_mat - + scaling_mat_offset[offset]; - pi2_dequant_matrix_v = pi2_scaling_mat - + scaling_mat_offset[offset + 1]; - - src_strd = trans_size; + offset = log2_uv_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx; + pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset]; + pi2_dequant_matrix_v = pi2_scaling_mat + scaling_mat_offset[offset + 1]; func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/ /* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma. - * By limiting func_idx to 7, max of 16x16 chroma is called */ - func_idx = MIN(func_idx, 7); + * Limit func_idx based on allowed max chroma tu size */ + func_idx = MIN(func_idx, (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 8 : 7); e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1); /* QP for U */ i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset; i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset; - u1_cbf = ps_tu->b1_cb_cbf; - chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset - + i1_chroma_slice_qp_offset; + chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset; chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); - qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6; - qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6; + qp_div = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6; + qp_rem = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6; /* QP for V */ i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset; i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset; - u1_cbf_v = ps_tu->b1_cr_cbf; - chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset - + i1_chroma_slice_qp_offset; + chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset; chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57); - qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6; - qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6; + qp_div_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6; + qp_rem_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6; + + y_cb_tu.pi2_tu_coeff = pi2_tu_coeff; + y_cb_tu.pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ + y_cb_tu.pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/ + y_cb_tu.tu_coeff_stride = trans_size; + y_cb_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + y_cb_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + y_cb_tu.cbf = ps_tu->b1_cb_cbf; + + cr_tu.pi2_tu_coeff = pi2_tu_coeff + trans_size * trans_size; + cr_tu.pu1_pred = y_cb_tu.pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ + cr_tu.pu1_dst = y_cb_tu.pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/ + cr_tu.tu_coeff_stride = trans_size; + cr_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + cr_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + cr_tu.cbf = ps_tu->b1_cr_cbf; /* Unpacking coeffs */ - transform_skip_flag = pu1_tu_coeff_data[1] & 1; - if(1 == u1_cbf) + y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; +#ifdef ENABLE_MAIN_REXT_PROFILE + y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; + y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; +#endif + if(1 == y_cb_tu.cbf) { pu1_tu_coeff_data = ihevcd_unpack_coeffs( - pi2_src, log2_uv_trans_size_minus_2 + 2, + y_cb_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, pu1_tu_coeff_data, pi2_dequant_matrix, qp_rem, qp_div, e_trans_type, - ps_tu->b1_transquant_bypass, &zero_cols, - &zero_rows, &coeff_type, - &i2_coeff_value); + ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols, + &y_cb_tu.zero_rows, &y_cb_tu.coeff_type, + &y_cb_tu.coeff_value); } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + cb_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu; + cb_sub_tu.pu1_pred = y_cb_tu.pu1_pred + trans_size * y_cb_tu.pred_strd; + cb_sub_tu.pu1_dst = y_cb_tu.pu1_dst + trans_size * y_cb_tu.dst_strd; + cb_sub_tu.tu_coeff_stride = trans_size; + cb_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + cb_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + cb_sub_tu.cbf = ps_tu->b1_cb_cbf_subtu1; + cb_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; + cb_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; + cb_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; + if(1 == cb_sub_tu.cbf) + { + pu1_tu_coeff_data = ihevcd_unpack_coeffs( + cb_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, + pu1_tu_coeff_data, pi2_dequant_matrix, + qp_rem, qp_div, e_trans_type, + ps_tu->b1_transquant_bypass, &cb_sub_tu.zero_cols, + &cb_sub_tu.zero_rows, &cb_sub_tu.coeff_type, + &cb_sub_tu.coeff_value); + } + } +#endif - transform_skip_flag_v = pu1_tu_coeff_data[1] & 1; - if(1 == u1_cbf_v) + cr_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; +#ifdef ENABLE_MAIN_REXT_PROFILE + cr_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; + cr_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; +#endif + if(1 == cr_tu.cbf) { pu1_tu_coeff_data = ihevcd_unpack_coeffs( - pi2_src_v, log2_uv_trans_size_minus_2 + 2, + cr_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, pu1_tu_coeff_data, pi2_dequant_matrix_v, qp_rem_v, qp_div_v, e_trans_type, - ps_tu->b1_transquant_bypass, &zero_cols_v, - &zero_rows_v, &coeff_type_v, &i2_coeff_value_v); + ps_tu->b1_transquant_bypass, &cr_tu.zero_cols, + &cr_tu.zero_rows, &cr_tu.coeff_type, &cr_tu.coeff_value); } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + cr_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu + trans_size * trans_size; + cr_sub_tu.pu1_pred = cr_tu.pu1_pred + trans_size * cr_tu.pred_strd; + cr_sub_tu.pu1_dst = cr_tu.pu1_dst + trans_size * cr_tu.dst_strd; + cr_sub_tu.tu_coeff_stride = trans_size; + cr_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + cr_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor; + cr_sub_tu.cbf = ps_tu->b1_cr_cbf_subtu1; + cr_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1; + cr_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1; + cr_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1; + if(1 == cr_sub_tu.cbf) + { + pu1_tu_coeff_data = ihevcd_unpack_coeffs( + cr_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2, + pu1_tu_coeff_data, pi2_dequant_matrix_v, + qp_rem_v, qp_div_v, e_trans_type, + ps_tu->b1_transquant_bypass, &cr_sub_tu.zero_cols, + &cr_sub_tu.zero_rows, &cr_sub_tu.coeff_type, + &cr_sub_tu.coeff_value); + } + } +#endif } + WORD8 subtu_idx = 0; + do + { /***************************************************************/ /****************** Intra Prediction **************************/ /***************************************************************/ @@ -948,7 +1334,7 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) so that SIMD functions can load 64 bits. Also some SIMD modules read few bytes before the start of the array, so allocate 16 extra bytes at the start */ - UWORD8 au1_ref_sub_out[16 + (MAX_TU_SIZE * 2 * 2) + 8] = {0}; + UWORD8 au1_ref_sub_out[16 + (MAX_TU_SIZE * 2 * 2 * 2) + 8] = {0}; UWORD8 *pu1_ref_sub_out = &au1_ref_sub_out[16]; UWORD8 *pu1_top_left, *pu1_top, *pu1_left; WORD32 luma_pred_func_idx, chroma_pred_func_idx; @@ -969,43 +1355,95 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) if(trans_size == 4) luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags; - if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0)) + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) { chroma_nbr_flags = luma_nbr_flags; } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + WORD32 bot_left, left, top, tp_right, tp_left; + tp_left = (luma_nbr_flags & 0x10000); + tp_right = (luma_nbr_flags & 0x0f000); + top = (luma_nbr_flags & 0x00f00); + left = (luma_nbr_flags & 0x000f0); + bot_left = (luma_nbr_flags & 0x0000f); + chroma_nbr_flags = tp_left | tp_right | top | left | (left >> 4); + chroma_nbr_flags_subtu = ((left != 0 ? 1 : 0) << 16) | (0xf << 8) + | left | bot_left; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) + { + if(((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0))) + chroma_nbr_flags = luma_nbr_flags; + } /* Initializing nbr pointers */ - pu1_top = pu1_pred - pic_strd; - pu1_left = pu1_pred - 1; - pu1_top_left = pu1_pred - pic_strd - 1; + pu1_top = y_cb_tu.pu1_pred - pic_strd; + pu1_left = y_cb_tu.pu1_pred - 1; + pu1_top_left = y_cb_tu.pu1_pred - pic_strd - 1; /* call reference array substitution */ if(luma_nbr_flags == 0x1ffff) ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr( pu1_top_left, - pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1); + pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1); else ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr( pu1_top_left, - pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1); + pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1); /* call reference filtering */ ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr( - pu1_ref_sub_out, trans_size, pu1_ref_sub_out, - u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag); + trans_size, + pu1_ref_sub_out, + u1_luma_pred_mode, +#ifdef ENABLE_MAIN_REXT_PROFILE + (ps_sps->i1_intra_smoothing_disabled_flag << 3 + | ps_sps->i1_strong_intra_smoothing_enable_flag) +#else + ps_sps->i1_strong_intra_smoothing_enable_flag +#endif + ); /* use the look up to get the function idx */ luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode]; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_implicit_rdpcm_enabled_flag && ps_tu->b1_transquant_bypass + && (u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26)) + disable_boundary_filter = 1; +#endif /* call the intra prediction function */ - ps_codec->apf_intra_pred_luma[luma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode); + ps_codec->apf_intra_pred_luma[luma_pred_func_idx]( + pu1_ref_sub_out, 1, + y_cb_tu.pu1_pred, + y_cb_tu.pred_strd, + trans_size, +#ifdef ENABLE_MAIN_REXT_PROFILE + (u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26) ? + disable_boundary_filter : + u1_luma_pred_mode +#else + u1_luma_pred_mode +#endif + ); } else { + +#ifdef ENABLE_MAIN_REXT_PROFILE + if(subtu_idx != 0) + { + ps_cb_tu = &cb_sub_tu; + ps_cr_tu = &cr_sub_tu; + chroma_nbr_flags = chroma_nbr_flags_subtu; + } +#endif + /* In case of yuv420sp_vu, prediction happens as usual. */ /* So point the pu1_pred pointer to original prediction pointer */ - UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset; + UWORD8 *pu1_pred_orig = ps_cb_tu->pu1_pred - chroma_yuv420sp_vu_u_offset; /* Top-Left | Top-Right | Top | Left | Bottom-Left * 1 4 4 4 4 @@ -1013,25 +1451,52 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) * Generating chroma_nbr_flags depending upon the transform size */ if(ps_tu->b3_size == 0) { - /* Take TL,T,L flags of First luma 4x4 block */ - chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0); - /* Take TR flags of Second luma 4x4 block */ - chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000); - /* Take BL flags of Third luma 4x4 block */ - chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F); + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) + { + /* Take TL,T,L flags of First luma 4x4 block */ + chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0); + /* Take TR flags of Second luma 4x4 block */ + chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000); + /* Take BL flags of Third luma 4x4 block */ + chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F); + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + if(subtu_idx == 0) + { + /* Take TL,T,L flags of First luma 4x4 block */ + chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0); + /* Take TR flags of Second luma 4x4 block */ + chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000); + /* Take BL flags of first luma 4x4 block */ + chroma_nbr_flags |= (luma_nbr_flags_4x4[0] & 0x0000F); + } + else + { + /* Take TL,T,L flags of Third luma 4x4 block */ + chroma_nbr_flags = (luma_nbr_flags_4x4[2] & 0x10FF0); + /* Take BL flags of Third luma 4x4 block */ + chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F); + } + } } /* Initializing nbr pointers */ - pu1_top = pu1_pred_orig - pic_strd; + pu1_top = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor); pu1_left = pu1_pred_orig - 2; - pu1_top_left = pu1_pred_orig - pic_strd - 2; + pu1_top_left = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor) - 2; + if(subtu_idx == 0) + { /* Chroma pred mode derivation from luma pred mode */ { tu_t *ps_tu_tmp = ps_tu; - while(!ps_tu_tmp->b1_first_tu_in_cu) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV444) { - ps_tu_tmp--; + while(!ps_tu_tmp->b1_first_tu_in_cu) + { + ps_tu_tmp--; + } } u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode; } @@ -1041,107 +1506,73 @@ WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc) { u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode]; - if(u1_chroma_pred_mode == - u1_luma_pred_mode_first_tu) + if(u1_chroma_pred_mode == u1_luma_pred_mode_first_tu) { u1_chroma_pred_mode = INTRA_ANGULAR(34); } } + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + u1_chroma_pred_mode = gau1_intra_pred_chroma_modes_422[u1_chroma_pred_mode]; + } + } /* call the chroma reference array substitution */ ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr( pu1_top_left, - pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, pu1_ref_sub_out, 1); + pu1_top, pu1_left, + ps_cb_tu->pred_strd, + trans_size, chroma_nbr_flags, pu1_ref_sub_out, 1, + ps_sps->i1_chroma_format_idc); + +#ifdef ENABLE_MAIN_REXT_PROFILE + /* call reference filtering */ + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_filtering_fptr( + pu1_ref_sub_out, + trans_size, + pu1_ref_sub_out, + u1_chroma_pred_mode, + (ps_sps->i1_intra_smoothing_disabled_flag << 3 + | ps_sps->i1_strong_intra_smoothing_enable_flag)); + } +#endif /* use the look up to get the function idx */ - chroma_pred_func_idx = - g_i4_ip_funcs[u1_chroma_pred_mode]; + chroma_pred_func_idx = g_i4_ip_funcs[u1_chroma_pred_mode]; /* call the intra prediction function */ - ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode); + ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred_orig, ps_cb_tu->pred_strd, trans_size, u1_chroma_pred_mode); } } /* Updating number of transform types */ STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx); +#ifdef ENABLE_MAIN_REXT_PROFILE + iqitrecon_fptr = get_iqitrec_func( + ps_proc, ps_tu, ps_cb_tu, log2_trans_size, + c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag, + c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode); +#endif /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */ - if(1 == u1_cbf) - { - if(ps_tu->b1_transquant_bypass || transform_skip_flag) - { - /* Recon */ - ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst, - src_strd, pred_strd, dst_strd, - zero_cols); - } - else - { - - /* Updating coded number of transform types(excluding trans skip and trans quant skip) */ - STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0); - - /* iQuant , iTrans and Recon */ - if((0 == coeff_type)) - { - ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp, - pu1_pred, pu1_dst, - src_strd, pred_strd, - dst_strd, zero_cols, - zero_rows); - } - else /* DC only */ - { - STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1); - ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst, - pred_strd, dst_strd, - log2_trans_size, - i2_coeff_value); - } - } - } + iqitrecon_fptr(ps_proc, ps_tu, ps_cb_tu, func_idx, log2_trans_size, + c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag, + c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode); /* IQ, IT and Recon for V */ if(c_idx != 0) { - if(1 == u1_cbf_v) - { - if(ps_tu->b1_transquant_bypass || transform_skip_flag_v) - { - /* Recon */ - ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v, - pu1_dst_v, src_strd, - pred_strd, dst_strd, - zero_cols_v); - } - else - { - /* Updating number of transform types */ - STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0); - - /* iQuant , iTrans and Recon */ - if((0 == coeff_type_v)) - { - ps_codec->apf_itrans_recon[func_idx](pi2_src_v, - pi2_tmp, - pu1_pred_v, - pu1_dst_v, - src_strd, - pred_strd, - dst_strd, - zero_cols_v, - zero_rows_v); - } - else /* DC only */ - { - STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1); - ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v, - pred_strd, dst_strd, - log2_trans_size, - i2_coeff_value_v); - } - } - } +#ifdef ENABLE_MAIN_REXT_PROFILE + iqitrecon_fptr = get_iqitrec_func(ps_proc, ps_tu, ps_cr_tu, log2_trans_size, + V_PLANE, intra_flag, u1_chroma_pred_mode); +#endif + iqitrecon_fptr(ps_proc, ps_tu, ps_cr_tu, func_idx, log2_trans_size, V_PLANE, + intra_flag, u1_chroma_pred_mode); } + } + while(c_idx != 0 && ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 + && ++subtu_idx < 2); } /* Neighbor availability inside CTB */ diff --git a/decoder/ihevcd_ittiam_logo.h b/decoder/ihevcd_ittiam_logo.h index 71540e3..8c8d7a6 100644 --- a/decoder/ihevcd_ittiam_logo.h +++ b/decoder/ihevcd_ittiam_logo.h @@ -44,12 +44,6 @@ #define LOGO_WD_Y LOGO_WD #define LOGO_HT_Y LOGO_HT -#define LOGO_WD_RGBA8888 160 -#define LOGO_HT_RGBA8888 64 - -#define LOGO_WD_RGB565 160 -#define LOGO_HT_RGB565 64 - #define LOGO_WD_444_UV LOGO_WD #define LOGO_HT_444_UV LOGO_HT @@ -63,15 +57,6 @@ #define LOGO_WD_420SP_VU (LOGO_WD) #define LOGO_HT_420SP_VU (LOGO_HT >> 1) -#define LOGO_WD_422_UV (LOGO_WD >> 1) -#define LOGO_HT_422_UV (LOGO_HT) - -#define LOGO_WD_422V_UV (LOGO_WD) -#define LOGO_HT_422V_UV (LOGO_HT >> 1) - -#define LOGO_WD_411_UV (LOGO_WD >> 2) -#define LOGO_HT_411_UV (LOGO_HT) - #define LOGO_CODEC_WD 80 #define LOGO_CODEC_HT 24 @@ -92,18 +77,6 @@ #define LOGO_CODEC_WD_420SP_VU (LOGO_CODEC_WD) #define LOGO_CODEC_HT_420SP_VU (LOGO_CODEC_HT >> 1) -#define LOGO_CODEC_WD_422_UV (LOGO_CODEC_WD >> 1) -#define LOGO_CODEC_HT_422_UV (LOGO_CODEC_HT) - -#define LOGO_CODEC_WD_422V_UV (LOGO_CODEC_WD) -#define LOGO_CODEC_HT_422V_UV (LOGO_CODEC_HT >> 1) - -#define LOGO_CODEC_WD_411_UV (LOGO_CODEC_WD >> 2) -#define LOGO_CODEC_HT_411_UV (LOGO_CODEC_HT) - - - - #define START_X_ITT_LOGO 0 #define START_Y_ITT_LOGO 0 diff --git a/decoder/ihevcd_nal.c b/decoder/ihevcd_nal.c index 18d9a5d..8112f62 100644 --- a/decoder/ihevcd_nal.c +++ b/decoder/ihevcd_nal.c @@ -451,6 +451,7 @@ IHEVCD_ERROR_T ihevcd_nal_unit(codec_t *ps_codec) DEBUG_PRINT_NAL_INFO(ps_codec, s_nal.i1_nal_unit_type); break; +#ifndef DISABLE_SEI case NAL_PREFIX_SEI: case NAL_SUFFIX_SEI: if(IVD_DECODE_HEADER == ps_codec->i4_header_mode) @@ -461,6 +462,7 @@ IHEVCD_ERROR_T ihevcd_nal_unit(codec_t *ps_codec) ret = ihevcd_parse_sei(ps_codec, &s_nal); break; +#endif case NAL_EOS : ps_codec->i4_cra_as_first_pic = 1; break; diff --git a/decoder/ihevcd_parse_headers.c b/decoder/ihevcd_parse_headers.c index 0ce1b27..9d66e58 100644 --- a/decoder/ihevcd_parse_headers.c +++ b/decoder/ihevcd_parse_headers.c @@ -75,11 +75,12 @@ #define COPY_DEFAULT_SCALING_LIST(pi2_scaling_mat) \ { \ - WORD32 scaling_mat_offset[]={0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992, 1248, 1504, 1760, 2016, 3040}; \ + WORD32 scaling_mat_offset[] = {0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, \ + 480, 736, 992, 1248, 1504, 1760, 2016, 3040, 4064, 5088, 6112, 7136}; \ \ /* scaling matrix for 4x4 */ \ memcpy(pi2_scaling_mat, gi2_flat_scale_mat_32x32, 6*16*sizeof(WORD16)); \ -/* scaling matrix for 8x8 */ \ + /* scaling matrix for 8x8 */ \ memcpy(pi2_scaling_mat + scaling_mat_offset[6], gi2_intra_default_scale_mat_8x8, 64*sizeof(WORD16)); \ memcpy(pi2_scaling_mat + scaling_mat_offset[7], gi2_intra_default_scale_mat_8x8, 64*sizeof(WORD16)); \ memcpy(pi2_scaling_mat + scaling_mat_offset[8], gi2_intra_default_scale_mat_8x8, 64*sizeof(WORD16)); \ @@ -95,12 +96,17 @@ memcpy(pi2_scaling_mat + scaling_mat_offset[17], gi2_inter_default_scale_mat_16x16, 256*sizeof(WORD16)); \ /* scaling matrix for 32x32 */ \ memcpy(pi2_scaling_mat + scaling_mat_offset[18], gi2_intra_default_scale_mat_32x32, 1024*sizeof(WORD16)); \ - memcpy(pi2_scaling_mat + scaling_mat_offset[19], gi2_inter_default_scale_mat_32x32, 1024*sizeof(WORD16)); \ + memcpy(pi2_scaling_mat + scaling_mat_offset[19], gi2_intra_default_scale_mat_32x32, 1024*sizeof(WORD16)); \ + memcpy(pi2_scaling_mat + scaling_mat_offset[20], gi2_intra_default_scale_mat_32x32, 1024*sizeof(WORD16)); \ + memcpy(pi2_scaling_mat + scaling_mat_offset[21], gi2_inter_default_scale_mat_32x32, 1024*sizeof(WORD16)); \ + memcpy(pi2_scaling_mat + scaling_mat_offset[22], gi2_inter_default_scale_mat_32x32, 1024*sizeof(WORD16)); \ + memcpy(pi2_scaling_mat + scaling_mat_offset[23], gi2_inter_default_scale_mat_32x32, 1024*sizeof(WORD16)); \ } #define COPY_FLAT_SCALING_LIST(pi2_scaling_mat) \ { \ - WORD32 scaling_mat_offset[]={0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992, 1248, 1504, 1760, 2016, 3040}; \ + WORD32 scaling_mat_offset[] = {0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, \ + 480, 736, 992, 1248, 1504, 1760, 2016, 3040, 4064, 5088, 6112, 7136}; \ \ /* scaling matrix for 4x4 */ \ memcpy(pi2_scaling_mat, gi2_flat_scale_mat_32x32, 6*16*sizeof(WORD16)); \ @@ -110,8 +116,10 @@ memcpy(pi2_scaling_mat + scaling_mat_offset[12], gi2_flat_scale_mat_32x32, 3*256*sizeof(WORD16)); \ memcpy(pi2_scaling_mat + scaling_mat_offset[15], gi2_flat_scale_mat_32x32, 3*256*sizeof(WORD16)); \ /* scaling matrix for 32x32 */ \ - memcpy(pi2_scaling_mat + scaling_mat_offset[18], gi2_flat_scale_mat_32x32, 1024*sizeof(WORD16)); \ - memcpy(pi2_scaling_mat + scaling_mat_offset[19], gi2_flat_scale_mat_32x32, 1024*sizeof(WORD16)); \ + for (WORD32 i = 0; i < 6; i++) \ + { \ + memcpy(pi2_scaling_mat + scaling_mat_offset[18 + i], gi2_flat_scale_mat_32x32, 1024*sizeof(WORD16)); \ + } \ } /* Function declarations */ @@ -165,7 +173,7 @@ WORD32 ihevcd_parse_pred_wt_ofst(bitstrm_t *ps_bitstrm, } ps_wt_ofst->i1_luma_log2_weight_denom = u4_value; - if(ps_sps->i1_chroma_format_idc != 0) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { SEV_PARSE("delta_chroma_log2_weight_denom", value, ps_bitstrm); if((value < -7) || (value > 7)) @@ -189,7 +197,7 @@ WORD32 ihevcd_parse_pred_wt_ofst(bitstrm_t *ps_bitstrm, - if(ps_sps->i1_chroma_format_idc != 0) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l0_active; i++) { @@ -289,7 +297,7 @@ WORD32 ihevcd_parse_pred_wt_ofst(bitstrm_t *ps_bitstrm, ps_wt_ofst->i1_luma_weight_l1_flag[i] = value; } - if(ps_sps->i1_chroma_format_idc != 0) + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) { for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l1_active; i++) { @@ -988,6 +996,22 @@ static WORD32 ihevcd_parse_vui_parameters(bitstrm_t *ps_bitstrm, return ret; } +static WORD32 ihevcd_get_profile(profile_tier_lvl_t *ps_ptl) +{ + WORD32 profile = IHEVC_PROFILE_UNKNOWN; + + if(ps_ptl->i1_profile_idc == 1 || ps_ptl->ai1_profile_compatibility_flag[1] == 1) + profile = IHEVC_PROFILE_MAIN; + else if(ps_ptl->i1_profile_idc == 3 || ps_ptl->ai1_profile_compatibility_flag[3] == 1) + profile = IHEVC_PROFILE_MAIN_STILL; +#ifdef ENABLE_MAIN_REXT_PROFILE + else if(ps_ptl->i1_profile_idc == 4 || ps_ptl->ai1_profile_compatibility_flag[4] == 1) + profile = IHEVC_PROFILE_MAIN_REXT; +#endif + + return profile; +} + /** ******************************************************************************* * @@ -1166,7 +1190,7 @@ static IHEVCD_ERROR_T ihevcd_profile_tier_level(bitstrm_t *ps_bitstrm, * ******************************************************************************* */ -IHEVCD_ERROR_T ihevcd_scaling_list_data(codec_t *ps_codec, WORD16 *pi2_scaling_mat) +IHEVCD_ERROR_T ihevcd_scaling_list_data(codec_t *ps_codec, WORD16 *pi2_scaling_mat, WORD8 chroma_format_idc) { IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; WORD32 size_id; @@ -1175,15 +1199,16 @@ IHEVCD_ERROR_T ihevcd_scaling_list_data(codec_t *ps_codec, WORD16 *pi2_scaling_ UWORD32 u4_value; WORD32 next_coef; WORD32 coef_num; - WORD32 i, j, offset; + WORD32 i, j, k, offset; bitstrm_t *ps_bitstrm = &ps_codec->s_parse.s_bitstrm; WORD16 *pi2_scaling_mat_offset; - WORD32 scaling_mat_offset[] = { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992, 1248, 1504, 1760, 2016, 3040 }; + WORD32 scaling_mat_offset[] = {0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, + 480, 736, 992, 1248, 1504, 1760, 2016, 3040, 4064, 5088, 6112, 7136}; UWORD8 *scan_table; for(size_id = 0; size_id < 4; size_id++) { - for(matrix_id = 0; matrix_id < ((size_id == 3) ? 2 : 6); matrix_id++) + for(matrix_id = 0; matrix_id < 6; matrix_id += (size_id == 3) ? 3 : 1) { WORD32 scaling_list_pred_mode_flag; WORD32 scaling_list_delta_coef; @@ -1197,14 +1222,24 @@ IHEVCD_ERROR_T ihevcd_scaling_list_data(codec_t *ps_codec, WORD16 *pi2_scaling_ WORD32 num_elements; UEV_PARSE("scaling_list_pred_matrix_id_delta", u4_value, ps_bitstrm); - if(u4_value > matrix_id) + if(size_id <= 2) { - return IHEVCD_INVALID_PARAMETER; + if(u4_value > matrix_id) + return IHEVCD_INVALID_PARAMETER; + } + else + { + if(u4_value > matrix_id / 3) + return IHEVCD_INVALID_PARAMETER; } num_elements = (1 << (4 + (size_id << 1))); if(0 != u4_value) - memmove(pi2_scaling_mat_offset, pi2_scaling_mat_offset - u4_value * num_elements, num_elements * sizeof(WORD16)); + { + memmove(pi2_scaling_mat_offset, + pi2_scaling_mat_offset - (u4_value * (size_id == 3 ? 3 : 1)) * num_elements, + num_elements * sizeof(WORD16)); + } } else { @@ -1294,6 +1329,40 @@ IHEVCD_ERROR_T ihevcd_scaling_list_data(codec_t *ps_codec, WORD16 *pi2_scaling_ } } } + // derive 32x32 CbCr scaling list from 16x16 CbCr scaling list + if(chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + WORD32 matrix_ids[] = { 1, 2, 4, 5 }; + WORD16 *pi2_ref_scaling_mat; + + scan_table = (UWORD8 *)gapv_ihevc_invscan[2]; + for(i = 0; i < 4; i++) + { + matrix_id = matrix_ids[i]; + pi2_ref_scaling_mat = pi2_scaling_mat + scaling_mat_offset[2 * 6 + matrix_id]; + pi2_scaling_mat_offset = pi2_scaling_mat + scaling_mat_offset[3 * 6 + matrix_id]; + + for(j = 0; j < 64; j++) + { + offset = scan_table[j]; + offset = (offset >> 3) * 16 * 2 + (offset & 0x7) * 2; + // the index 0 value may be overwritten by scaling_list_dc_coef. pick its alternative + next_coef = offset == 0 ? pi2_ref_scaling_mat[1] : pi2_ref_scaling_mat[offset]; + + offset = scan_table[j]; + offset = (offset >> 3) * 32 * 4 + (offset & 0x7) * 4; + + for(k = 0; k < 4; k++) + { + pi2_scaling_mat_offset[offset + k * 32] = next_coef; + pi2_scaling_mat_offset[offset + 1 + k * 32] = next_coef; + pi2_scaling_mat_offset[offset + 2 + k * 32] = next_coef; + pi2_scaling_mat_offset[offset + 3 + k * 32] = next_coef; + } + } + pi2_scaling_mat_offset[0] = pi2_ref_scaling_mat[0]; + } + } return ret; } @@ -1506,15 +1575,34 @@ IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec) { return IHEVCD_INVALID_PARAMETER; } - ps_sps->i1_chroma_format_idc = value; - if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV420) + WORD32 profile = ihevcd_get_profile(&ps_sps->s_ptl.s_ptl_gen); + if(profile == IHEVC_PROFILE_UNKNOWN) { - ps_codec->s_parse.i4_error_code = IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC; - return (IHEVCD_ERROR_T)IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC; + return IHEVCD_UNSUPPORTED_TOOL_SET; } - if(CHROMA_FMT_IDC_YUV444_PLANES == ps_sps->i1_chroma_format_idc) + switch(value) { + case CHROMA_FMT_IDC_MONOCHROME: { + if (!(ps_codec->u4_enable_yuv_formats & (1 << CHROMA_FMT_IDC_MONOCHROME))) { + ps_codec->s_parse.i4_error_code = IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC; + return (IHEVCD_ERROR_T)IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC; + } + if(profile != IHEVC_PROFILE_MAIN_REXT) + return IHEVCD_INVALID_PARAMETER; + } + break; + case CHROMA_FMT_IDC_YUV420: + break; + default: { + ps_codec->s_parse.i4_error_code = IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC; + return (IHEVCD_ERROR_T)IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC; + } + break; + } + ps_sps->i1_chroma_format_idc = value; + + if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) { BITS_PARSE("separate_colour_plane_flag", value, ps_bitstrm, 1); ps_sps->i1_separate_colour_plane_flag = value; @@ -1749,7 +1837,7 @@ IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec) ps_sps->i1_sps_scaling_list_data_present_flag = value; if(ps_sps->i1_sps_scaling_list_data_present_flag) - ihevcd_scaling_list_data(ps_codec, ps_sps->pi2_scaling_mat); + ihevcd_scaling_list_data(ps_codec, ps_sps->pi2_scaling_mat, ps_sps->i1_chroma_format_idc); } else { @@ -1844,9 +1932,107 @@ IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec) &ps_sps->s_vui_parameters, ps_sps->i1_sps_max_sub_layers - 1); RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + + if (0 != ps_codec->u4_allocate_dynamic_done) { + + vui_t *ps_vui = &ps_sps->s_vui_parameters; + sps_t *ps_sps_old = ps_codec->s_parse.ps_sps; + vui_t *ps_vui_old = &ps_sps_old->s_vui_parameters; + + if (ps_vui->u1_video_full_range_flag != ps_vui_old->u1_video_full_range_flag || + ps_vui->u1_colour_primaries != ps_vui_old->u1_colour_primaries || + ps_vui->u1_transfer_characteristics != ps_vui_old->u1_transfer_characteristics || + ps_vui->u1_matrix_coefficients != ps_vui_old->u1_matrix_coefficients) { + ps_codec->i4_reset_flag = 1; + return (IHEVCD_ERROR_T)IVD_RES_CHANGED; + } + } } - BITS_PARSE("sps_extension_flag", value, ps_bitstrm, 1); + BITS_PARSE("sps_extension_present_flag", value, ps_bitstrm, 1); + ps_sps->i1_sps_extension_present_flag = value; + + if(ps_sps->i1_sps_extension_present_flag) + { + BITS_PARSE("sps_range_extension_flag", value, ps_bitstrm, 1); + ps_sps->i1_sps_range_extension_flag = value; + + BITS_PARSE("sps_multilayer_extension_flag", value, ps_bitstrm, 1); + ps_sps->i1_sps_multilayer_extension_flag = value; + + BITS_PARSE("sps_3d_extension_flag", value, ps_bitstrm, 1); + ps_sps->i1_sps_3d_extension_flag = value; + + BITS_PARSE("sps_scc_extension_flag", value, ps_bitstrm, 1); + ps_sps->i1_sps_scc_extension_flag = value; + + BITS_PARSE("sps_extension_4bits", value, ps_bitstrm, 4); + ps_sps->i1_sps_extension_4bits = value; + } + +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_sps_range_extension_flag) + { + /* ITU-T H.265 Section 7.3.2.2.2 Range extension sequence parameter set syntax */ + BITS_PARSE("transform_skip_rotation_enabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_transform_skip_rotation_enabled_flag = value; + + BITS_PARSE("transform_skip_context_enabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_transform_skip_context_enabled_flag = value; + + BITS_PARSE("implicit_rdpcm_enabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_implicit_rdpcm_enabled_flag = value; + + BITS_PARSE("explicit_rdpcm_enabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_explicit_rdpcm_enabled_flag = value; + + BITS_PARSE("extended_precision_processing_flag", value, ps_bitstrm, 1); + ps_sps->i1_extended_precision_processing_flag = value; + + BITS_PARSE("intra_smoothing_disabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_intra_smoothing_disabled_flag = value; + + BITS_PARSE("high_precision_offsets_enabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_use_high_precision_pred_wt = value; + + BITS_PARSE("persistent_rice_adaptation_enabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_persistent_rice_adaptation_enabled_flag = value; + + BITS_PARSE("cabac_bypass_alignment_enabled_flag", value, ps_bitstrm, 1); + ps_sps->i1_align_cabac_before_bypass = value; + } + if(profile != IHEVC_PROFILE_MAIN_REXT) + { + if(ps_sps->i1_transform_skip_rotation_enabled_flag + || ps_sps->i1_transform_skip_context_enabled_flag + || ps_sps->i1_implicit_rdpcm_enabled_flag + || ps_sps->i1_explicit_rdpcm_enabled_flag + || ps_sps->i1_extended_precision_processing_flag + || ps_sps->i1_intra_smoothing_disabled_flag + || ps_sps->i1_persistent_rice_adaptation_enabled_flag + || ps_sps->i1_align_cabac_before_bypass) + { + return IHEVCD_INVALID_PARAMETER; + } + } + if(ps_sps->i1_extended_precision_processing_flag || ps_sps->i1_align_cabac_before_bypass) + { + // main-rext 8-bit profiles require these fields to be off + return IHEVCD_INVALID_PARAMETER; + } + if(ps_sps->i1_sps_multilayer_extension_flag || ps_sps->i1_sps_3d_extension_flag + || ps_sps->i1_sps_scc_extension_flag) + { + return IHEVCD_UNSUPPORTED_TOOL_SET; + } +#else + if(ps_sps->i1_sps_range_extension_flag || ps_sps->i1_sps_multilayer_extension_flag + || ps_sps->i1_sps_3d_extension_flag || ps_sps->i1_sps_scc_extension_flag) + { + // TODO: add support for parsing these syntax elements + return IHEVCD_UNSUPPORTED_TOOL_SET; + } +#endif if((UWORD8 *)ps_bitstrm->pu4_buf > ps_bitstrm->pu1_buf_max) { @@ -1893,13 +2079,33 @@ IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec) return (IHEVCD_ERROR_T)IVD_RES_CHANGED; } + // Ensure both i2_pic_width_in_luma_samples and i2_pic_height_in_luma_samples do + // not exceed MAX_WD and their product doesn't exceed MAX_WD * MAX_HT if((ps_sps->i2_pic_width_in_luma_samples > MAX_WD) || + (ps_sps->i2_pic_height_in_luma_samples > MAX_WD) || ((ps_sps->i2_pic_width_in_luma_samples * ps_sps->i2_pic_height_in_luma_samples) > (MAX_WD * MAX_HT))) { return (IHEVCD_ERROR_T)IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED; } + // limit format conversions + if(ps_codec->e_chroma_fmt == IV_YUV_444P + && ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV444) + { + return (IHEVCD_ERROR_T)IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED; + } + if(ps_codec->e_chroma_fmt == IV_YUV_422P + && ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV422) + { + return (IHEVCD_ERROR_T)IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED; + } + if((ps_codec->e_chroma_fmt == IV_YUV_420SP_UV || ps_codec->e_chroma_fmt == IV_YUV_420SP_VU) + && ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV420) + { + return (IHEVCD_ERROR_T)IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED; + } + /* Update display width and display height */ { WORD32 disp_wd, disp_ht; @@ -1912,6 +2118,11 @@ IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec) crop_unit_x = 2; crop_unit_y = 2; } + else if(CHROMA_FMT_IDC_YUV422 == ps_sps->i1_chroma_format_idc) + { + crop_unit_x = 2; + crop_unit_y = 1; + } disp_wd = ps_sps->i2_pic_width_in_luma_samples; disp_wd -= ps_sps->i2_pic_crop_left_offset * crop_unit_x; @@ -2399,7 +2610,7 @@ IHEVCD_ERROR_T ihevcd_parse_pps(codec_t *ps_codec) if(ps_pps->i1_pps_scaling_list_data_present_flag) { COPY_DEFAULT_SCALING_LIST(ps_pps->pi2_scaling_mat); - ihevcd_scaling_list_data(ps_codec, ps_pps->pi2_scaling_mat); + ihevcd_scaling_list_data(ps_codec, ps_pps->pi2_scaling_mat, ps_sps->i1_chroma_format_idc); } BITS_PARSE("lists_modification_present_flag", value, ps_bitstrm, 1); @@ -2413,8 +2624,117 @@ IHEVCD_ERROR_T ihevcd_parse_pps(codec_t *ps_codec) BITS_PARSE("slice_header_extension_present_flag", value, ps_bitstrm, 1); ps_pps->i1_slice_header_extension_present_flag = value; - /* Not present in HM */ - BITS_PARSE("pps_extension_flag", value, ps_bitstrm, 1); + + BITS_PARSE("pps_extension_present_flag", value, ps_bitstrm, 1); + ps_pps->i1_pps_extension_present_flag = value; + + if(ps_pps->i1_pps_extension_present_flag) + { + BITS_PARSE("pps_range_extension_flag", value, ps_bitstrm, 1); + ps_pps->i1_pps_range_extension_flag = value; + + BITS_PARSE("pps_multilayer_extension_flag", value, ps_bitstrm, 1); + ps_pps->i1_pps_multilayer_extension_flag = value; + + BITS_PARSE("pps_3d_extension_flag", value, ps_bitstrm, 1); + ps_pps->i1_pps_3d_extension_flag = value; + + BITS_PARSE("pps_scc_extension_flag", value, ps_bitstrm, 1); + ps_pps->i1_pps_scc_extension_flag = value; + + BITS_PARSE("pps_extension_4bits", value, ps_bitstrm, 4); + ps_pps->i1_pps_extension_4bits = value; + } + +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_pps->i1_pps_range_extension_flag) + { + /* ITU-T H.265 Section 7.3.2.3.2 Range extension PPS syntax */ + if(ps_pps->i1_transform_skip_enabled_flag) + { + UEV_PARSE("log2_max_transform_skip_block_size_minus2", value, ps_bitstrm); + ps_pps->i1_log2_max_transform_skip_block_size_minus2 = value; + } + + BITS_PARSE("cross_component_prediction_enabled_flag", value, ps_bitstrm, 1); + ps_pps->i1_cross_component_prediction_enabled_flag = value; + + // Parse other bits to maintain bitstream alignment + BITS_PARSE("chroma_qp_offset_list_enabled_flag", value, ps_bitstrm, 1); + ps_pps->i1_chroma_qp_offset_list_enabled_flag = value; + + if(ps_pps->i1_chroma_qp_offset_list_enabled_flag) + { + UEV_PARSE("diff_cu_chroma_qp_offset_depth", value, ps_bitstrm); + ps_pps->i4_diff_cu_chroma_qp_offset_depth = value; + + UEV_PARSE("chroma_qp_offset_list_len_minus1", value, ps_bitstrm); + ps_pps->i4_chroma_qp_offset_list_len_minus1 = value; + if(ps_pps->i4_chroma_qp_offset_list_len_minus1 < 0 + || ps_pps->i4_chroma_qp_offset_list_len_minus1 > 5) + { + return IHEVCD_INVALID_PARAMETER; + } + + for(int i = 0; i <= ps_pps->i4_chroma_qp_offset_list_len_minus1; i++) + { + // cb_qp_offset_list[i] (se-v) + SEV_PARSE("cb_qp_offset_list", value, ps_bitstrm); + ps_pps->i4_cb_qp_offset_list[i] = value; + + // cr_qp_offset_list[i] (se-v) + SEV_PARSE("cr_qp_offset_list", value, ps_bitstrm); + ps_pps->i4_cr_qp_offset_list[i] = value; + } + } + + // log2_sao_offset_scale_luma (ue-v) + UEV_PARSE("log2_sao_ofst_scale_luma", value, ps_bitstrm); + ps_pps->i1_log2_sao_ofst_scale_luma = value; + + // log2_sao_offset_scale_chroma (ue-v) + UEV_PARSE("log2_sao_ofst_scale_chroma", value, ps_bitstrm); + ps_pps->i1_log2_sao_ofst_scale_chroma = value; + } + + WORD32 profile = ihevcd_get_profile(&ps_sps->s_ptl.s_ptl_gen); + if(profile != IHEVC_PROFILE_MAIN_REXT) + { + if(ps_pps->i1_log2_max_transform_skip_block_size_minus2 + || ps_pps->i1_cross_component_prediction_enabled_flag + || ps_pps->i1_chroma_qp_offset_list_enabled_flag + || ps_pps->i1_log2_sao_ofst_scale_luma + || ps_pps->i1_log2_sao_ofst_scale_chroma) + { + return IHEVCD_INVALID_PARAMETER; + } + } + if(ps_pps->i1_log2_sao_ofst_scale_luma + || ps_pps->i1_log2_sao_ofst_scale_chroma) + { + // main-rext 8-bit profiles require these fields to be off + return IHEVCD_INVALID_PARAMETER; + } + + if(ps_pps->i1_chroma_qp_offset_list_enabled_flag) + { + // TODO: decoder does not yet supports these tool-sets + return IHEVCD_UNSUPPORTED_TOOL_SET; + } + if(ps_pps->i1_pps_multilayer_extension_flag || ps_pps->i1_pps_3d_extension_flag + || ps_pps->i1_pps_scc_extension_flag) + { + // TODO: add support for parsing these syntax elements + return IHEVCD_UNSUPPORTED_TOOL_SET; + } +#else + if(ps_pps->i1_pps_range_extension_flag || ps_pps->i1_pps_multilayer_extension_flag + || ps_pps->i1_pps_3d_extension_flag || ps_pps->i1_pps_scc_extension_flag) + { + // TODO: add support for parsing these syntax elements + return IHEVCD_UNSUPPORTED_TOOL_SET; + } +#endif if((UWORD8 *)ps_bitstrm->pu4_buf > ps_bitstrm->pu1_buf_max) return IHEVCD_INVALID_PARAMETER; @@ -2457,6 +2777,7 @@ void ihevcd_copy_pps(codec_t *ps_codec, WORD32 pps_id, WORD32 pps_id_ref) } +#ifndef DISABLE_SEI IHEVCD_ERROR_T ihevcd_parse_buffering_period_sei(codec_t *ps_codec, sps_t *ps_sps) { @@ -3167,6 +3488,7 @@ IHEVCD_ERROR_T ihevcd_parse_sei(codec_t *ps_codec, nal_header_t *ps_nal) return ret; } +#endif /** ******************************************************************************* diff --git a/decoder/ihevcd_parse_headers.h b/decoder/ihevcd_parse_headers.h index 6e9870f..f6b6b5b 100644 --- a/decoder/ihevcd_parse_headers.h +++ b/decoder/ihevcd_parse_headers.h @@ -42,12 +42,14 @@ void ihevcd_copy_slice_hdr(codec_t *ps_codec, WORD32 slice_idx, WORD32 slice_idx IHEVCD_ERROR_T ihevcd_parse_vps(codec_t *ps_codec); IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec); IHEVCD_ERROR_T ihevcd_parse_pps(codec_t *ps_codec); +#ifndef DISABLE_SEI IHEVCD_ERROR_T ihevcd_parse_sei(codec_t *ps_codec, nal_header_t *ps_nal); IHEVCD_ERROR_T ihevcd_parse_pic_timing_sei(codec_t *ps_codec, sps_t *ps_sps); IHEVCD_ERROR_T ihevcd_parse_buffering_period_sei(codec_t *ps_codec, sps_t *ps_sps); IHEVCD_ERROR_T ihevcd_parse_time_code_sei(codec_t *ps_codec); IHEVCD_ERROR_T ihevcd_parse_user_data_registered_itu_t_t35(codec_t *ps_codec, UWORD32 u4_payload_size); IHEVCD_ERROR_T ihevcd_parse_active_parameter_sets_sei(codec_t *ps_codec, sps_t *ps_sps); +#endif IHEVCD_ERROR_T ihevcd_read_rbsp_trailing_bits(codec_t *ps_codec, UWORD32 u4_bits_left); IHEVCD_ERROR_T ihevcd_parse_slice_header(codec_t *ps_codec, nal_header_t *ps_nal); diff --git a/decoder/ihevcd_parse_residual.c b/decoder/ihevcd_parse_residual.c index 0a39f99..e89235f 100644 --- a/decoder/ihevcd_parse_residual.c +++ b/decoder/ihevcd_parse_residual.c @@ -185,7 +185,11 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, { IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; WORD32 transform_skip_flag; +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD32 explicit_rdpcm_flag, explicit_rdpcm_dir; +#endif WORD32 value; + sps_t *ps_sps; pps_t *ps_pps; WORD32 last_scan_pos, last_sub_blk; bitstrm_t *ps_bitstrm = &ps_codec->s_parse.s_bitstrm; @@ -205,13 +209,23 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, WORD32 sig_coeff_base_ctxt, abs_gt1_base_ctxt; UNUSED(x0); UNUSED(y0); + ps_sps = ps_codec->s_parse.ps_sps; ps_pps = ps_codec->s_parse.ps_pps; sign_data_hiding_flag = ps_pps->i1_sign_data_hiding_flag; transform_skip_flag = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + explicit_rdpcm_flag = 0; + explicit_rdpcm_dir = 0; +#endif if(ps_pps->i1_transform_skip_enabled_flag && !ps_codec->s_parse.s_cu.i4_cu_transquant_bypass && - (log2_trafo_size == 2)) +#ifdef ENABLE_MAIN_REXT_PROFILE + (log2_trafo_size <= ps_pps->i1_log2_max_transform_skip_block_size_minus2 + 2) +#else + (log2_trafo_size == 2) +#endif + ) { WORD32 ctxt_idx; @@ -231,6 +245,29 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, transform_skip_flag = value; } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(PRED_MODE_INTER == ps_codec->s_parse.s_cu.i4_pred_mode + && ps_sps->i1_explicit_rdpcm_enabled_flag + && (transform_skip_flag || ps_codec->s_parse.s_cu.i4_cu_transquant_bypass)) + + { + WORD32 ctxt_idx = IHEVC_CAB_EXPLICIT_RDPCM_FLAG + (c_idx != 0); + TRACE_CABAC_CTXT("explicit_rdpcm_flag", ps_cabac->u4_range, ctxt_idx); + value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); + AEV_TRACE("explicit_rdpcm_flag", value, ps_cabac->u4_range); + explicit_rdpcm_flag = value; + + if(explicit_rdpcm_flag) + { + ctxt_idx = IHEVC_CAB_EXPLICIT_RDPCM_DIR + (c_idx != 0); + TRACE_CABAC_CTXT("explicit_rdpcm_dir_flag", ps_cabac->u4_range, ctxt_idx); + value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); + AEV_TRACE("explicit_rdpcm_dir_flag", value, ps_cabac->u4_range); + explicit_rdpcm_dir = value; + } + } +#endif + /* code the last_coeff_x_prefix as tunary binarized code */ { WORD32 ctxt_idx_x, ctxt_idx_y, ctx_shift; @@ -311,7 +348,8 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, scan_idx = SCAN_DIAG_UPRIGHT; if(PRED_MODE_INTRA == ps_codec->s_parse.s_cu.i4_pred_mode) { - if((2 == log2_trafo_size) || ((3 == log2_trafo_size) && (0 == c_idx))) + int is_YUV444 = ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444; + if((2 == log2_trafo_size) || ((3 == log2_trafo_size) && (0 == c_idx || is_YUV444))) { if((6 <= intra_pred_mode) && (14 >= intra_pred_mode)) @@ -343,9 +381,15 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, /* This will be updated later */ *pi1_num_coded_subblks = 0; - /* Second WORD8 gives (scan idx << 1) | trans_skip */ pi1_scan_idx = pi1_buf++; +#ifdef ENABLE_MAIN_REXT_PROFILE + /* Second WORD8 gives (explicit_rdpcm_dir << 5) | (explicit_rdpcm_flag << 4) | (scan idx << 1) | trans_skip */ + *pi1_scan_idx = (explicit_rdpcm_dir << 5) | (explicit_rdpcm_flag << 4) | (scan_idx << 1) + | transform_skip_flag; +#else + /* Second WORD8 gives (scan idx << 1) | trans_skip */ *pi1_scan_idx = (scan_idx << 1) | transform_skip_flag; +#endif /* Store the incremented pointer in pv_tu_coeff_data */ ps_codec->s_parse.pv_tu_coeff_data = pi1_buf; @@ -473,6 +517,9 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, WORD32 rice_param; WORD32 xs, ys; +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD8 i1_update_stats = ps_sps->i1_persistent_rice_adaptation_enabled_flag; +#endif sub_blk_pos = 0; if(i && (log2_trafo_size > 2)) @@ -572,7 +619,18 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, //coeff_pos = pu1_scan_coeff[n]; /* derive the context inc as per section 9.3.3.1.4 */ sig_ctxinc = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_transform_skip_context_enabled_flag + && (ps_codec->s_parse.s_cu.i4_cu_transquant_bypass + || transform_skip_flag)) + { + sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG; + sig_coeff_base_ctxt += (0 == c_idx) ? 42 : 43; + } + else if(2 == log2_trafo_size) +#else if(2 == log2_trafo_size) +#endif { /* 4x4 transform size increment uses lookup */ @@ -727,7 +785,26 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, /* At this level u4_sig_coeff_map is non-zero i.e. has atleast one non-zero coeff */ last_sig_scan_pos = (31 - CLZ(u4_sig_coeff_map)); first_sig_scan_pos = CTZ(u4_sig_coeff_map); - sign_hidden = (((last_sig_scan_pos - first_sig_scan_pos) > 3) && !ps_codec->s_parse.s_cu.i4_cu_transquant_bypass); + + if(ps_codec->s_parse.s_cu.i4_cu_transquant_bypass +#ifdef ENABLE_MAIN_REXT_PROFILE + || explicit_rdpcm_flag +#endif + || (PRED_MODE_INTRA == ps_codec->s_parse.s_cu.i4_pred_mode +#ifdef ENABLE_MAIN_REXT_PROFILE + && ps_sps->i1_implicit_rdpcm_enabled_flag +#else + && 0 +#endif + && transform_skip_flag + && (intra_pred_mode == 10 || intra_pred_mode == 26))) + { + sign_hidden = 0; + } + else + { + sign_hidden = ((last_sig_scan_pos - first_sig_scan_pos) > 3); + } u4_coeff_abs_level_greater2_map = 0; @@ -766,7 +843,22 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, num_sig_coeff = 0; sum_abs_level = 0; - rice_param = 0; + +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD32 sb_type = 2 * (c_idx == 0 ? 1 : 0); + if(ps_sps->i1_persistent_rice_adaptation_enabled_flag) + { + if(!(transform_skip_flag == 0 && ps_codec->s_parse.s_cu.i4_cu_transquant_bypass == 0)) + { + sb_type += 1; + } + rice_param = ps_cabac->ai4_rice_stat_coeff[sb_type] / 4; + } + else +#endif + { + rice_param = 0; + } { UWORD32 clz; UWORD32 u4_sig_coeff_map_shift; @@ -852,10 +944,36 @@ WORD32 ihevcd_parse_residual_coding(codec_t *ps_codec, } /* update the rice param based on coeff level */ - if((base_lvl > (3 << rice_param)) && (rice_param < 4)) + if(base_lvl > (3 << rice_param)) { - rice_param++; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_persistent_rice_adaptation_enabled_flag) + { + rice_param += 1; + } + else +#endif + { + rice_param = MIN((rice_param + 1), 4); + } } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(i1_update_stats) + { + if(coeff_abs_level_remaining + >= (3 << (ps_cabac->ai4_rice_stat_coeff[sb_type] / 4))) + { + ps_cabac->ai4_rice_stat_coeff[sb_type]++; + } + else if((2 * coeff_abs_level_remaining + < (1 << (ps_cabac->ai4_rice_stat_coeff[sb_type] / 4))) + && ps_cabac->ai4_rice_stat_coeff[sb_type] > 0) + { + ps_cabac->ai4_rice_stat_coeff[sb_type]--; + } + i1_update_stats = 0; + } +#endif /* Compute absolute level */ level = base_lvl; diff --git a/decoder/ihevcd_parse_slice.c b/decoder/ihevcd_parse_slice.c index d6f74f9..1337d58 100644 --- a/decoder/ihevcd_parse_slice.c +++ b/decoder/ihevcd_parse_slice.c @@ -120,7 +120,8 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, WORD32 log2_trafo_size, WORD32 trafo_depth, WORD32 blk_idx, - WORD32 intra_pred_mode) + WORD32 intra_pred_mode, + WORD32 chroma_intra_pred_mode_idx) { IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS; sps_t *ps_sps; @@ -180,13 +181,30 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, { ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth] = 0; ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth] = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + ps_codec->s_parse.s_cu.ai1_cbf_cr_subtu[trafo_depth] = 0; + ps_codec->s_parse.s_cu.ai1_cbf_cb_subtu[trafo_depth] = 0; + } +#endif } else { ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth] = ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth - 1]; ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth] = ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth - 1]; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + ps_codec->s_parse.s_cu.ai1_cbf_cb_subtu[trafo_depth] = + ps_codec->s_parse.s_cu.ai1_cbf_cb_subtu[trafo_depth - 1]; + ps_codec->s_parse.s_cu.ai1_cbf_cr_subtu[trafo_depth] = + ps_codec->s_parse.s_cu.ai1_cbf_cr_subtu[trafo_depth - 1]; + } +#endif } - if(trafo_depth == 0 || log2_trafo_size > 2) + if ((CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && log2_trafo_size > 2) || + ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) { ctxt_idx = IHEVC_CAB_CBCR_IDX + trafo_depth; /* CBF for Cb/Cr is sent only if the parent CBF for Cb/Cr is non-zero */ @@ -196,6 +214,16 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); AEV_TRACE("cbf_cb", value, ps_cabac->u4_range); ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth] = value; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 + && (!split_transform_flag || log2_trafo_size == 3)) + { + TRACE_CABAC_CTXT("cbf_cb", ps_cabac->u4_range, ctxt_idx); + value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); + AEV_TRACE("cbf_cb", value, ps_cabac->u4_range); + ps_codec->s_parse.s_cu.ai1_cbf_cb_subtu[trafo_depth] = value; + } +#endif } if((trafo_depth == 0) || ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth - 1]) @@ -204,11 +232,22 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); AEV_TRACE("cbf_cr", value, ps_cabac->u4_range); ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth] = value; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 + && (!split_transform_flag || log2_trafo_size == 3)) + { + TRACE_CABAC_CTXT("cbf_cr", ps_cabac->u4_range, ctxt_idx); + value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); + AEV_TRACE("cbf_cr", value, ps_cabac->u4_range); + ps_codec->s_parse.s_cu.ai1_cbf_cr_subtu[trafo_depth] = value; + } +#endif } } if(split_transform_flag) { WORD32 intra_pred_mode_tmp; + WORD32 chroma_intra_pred_mode_tmp_idx = ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[0]; x1 = x0 + ((1 << log2_trafo_size) >> 1); y1 = y0 + ((1 << log2_trafo_size) >> 1); @@ -217,19 +256,27 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, /* When depth is non-zero intra pred mode of parent node is sent */ /* This takes care of passing correct mode to all the child nodes */ intra_pred_mode_tmp = trafo_depth ? intra_pred_mode : ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[0]; - ret = ihevcd_parse_transform_tree(ps_codec, x0, y0, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 0, intra_pred_mode_tmp); + if (ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + chroma_intra_pred_mode_tmp_idx = trafo_depth ? chroma_intra_pred_mode_idx : ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[0]; + ret = ihevcd_parse_transform_tree(ps_codec, x0, y0, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 0, intra_pred_mode_tmp, chroma_intra_pred_mode_tmp_idx); RETURN_IF((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret, ret); intra_pred_mode_tmp = trafo_depth ? intra_pred_mode : ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[1]; - ret = ihevcd_parse_transform_tree(ps_codec, x1, y0, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 1, intra_pred_mode_tmp); + if (ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + chroma_intra_pred_mode_tmp_idx = trafo_depth ? chroma_intra_pred_mode_idx : ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[1]; + ret = ihevcd_parse_transform_tree(ps_codec, x1, y0, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 1, intra_pred_mode_tmp, chroma_intra_pred_mode_tmp_idx); RETURN_IF((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret, ret); intra_pred_mode_tmp = trafo_depth ? intra_pred_mode : ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[2]; - ret = ihevcd_parse_transform_tree(ps_codec, x0, y1, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 2, intra_pred_mode_tmp); + if (ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + chroma_intra_pred_mode_tmp_idx = trafo_depth ? chroma_intra_pred_mode_idx : ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[2]; + ret = ihevcd_parse_transform_tree(ps_codec, x0, y1, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 2, intra_pred_mode_tmp, chroma_intra_pred_mode_tmp_idx); RETURN_IF((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret, ret); intra_pred_mode_tmp = trafo_depth ? intra_pred_mode : ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[3]; - ret = ihevcd_parse_transform_tree(ps_codec, x1, y1, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 3, intra_pred_mode_tmp); + if (ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + chroma_intra_pred_mode_tmp_idx = trafo_depth ? chroma_intra_pred_mode_idx : ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[3]; + ret = ihevcd_parse_transform_tree(ps_codec, x1, y1, x0, y0, log2_trafo_size - 1, trafo_depth + 1, 3, intra_pred_mode_tmp, chroma_intra_pred_mode_tmp_idx); RETURN_IF((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret, ret); } @@ -238,6 +285,7 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, WORD32 ctb_x_base; WORD32 ctb_y_base; WORD32 cu_qp_delta_abs; + WORD32 cbf_chroma; @@ -245,11 +293,19 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, cu_qp_delta_abs = 0; ctb_x_base = ps_codec->s_parse.i4_ctb_x << ps_sps->i1_log2_ctb_size; ctb_y_base = ps_codec->s_parse.i4_ctb_y << ps_sps->i1_log2_ctb_size; + cbf_chroma = ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth] + || ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth]; +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + cbf_chroma |= ps_codec->s_parse.s_cu.ai1_cbf_cb_subtu[trafo_depth] + || ps_codec->s_parse.s_cu.ai1_cbf_cr_subtu[trafo_depth]; + } +#endif if((ps_codec->s_parse.s_cu.i4_pred_mode == PRED_MODE_INTRA) || (trafo_depth != 0) || - (ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth]) || - (ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth])) + (cbf_chroma)) { ctxt_idx = IHEVC_CAB_CBF_LUMA_IDX; ctxt_idx += (trafo_depth == 0) ? 1 : 0; @@ -268,21 +324,31 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, /* Initialize ps_tu to default values */ /* If required change this to WORD32 packed write */ ps_tu->b1_cb_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cb_cbf_subtu1 = 0; +#endif ps_tu->b1_cr_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cr_cbf_subtu1 = 0; +#endif ps_tu->b1_y_cbf = 0; ps_tu->b4_pos_x = ((x0 - ctb_x_base) >> 2); ps_tu->b4_pos_y = ((y0 - ctb_y_base) >> 2); ps_tu->b1_transquant_bypass = ps_codec->s_parse.s_cu.i4_cu_transquant_bypass; ps_tu->b3_size = (log2_trafo_size - 2); ps_tu->b7_qp = ps_codec->s_parse.u4_qp; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b3_cb_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cb_log2_res_sign = 0; + ps_tu->b3_cr_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cr_log2_res_sign = 0; +#endif ps_tu->b6_luma_intra_mode = intra_pred_mode; - ps_tu->b3_chroma_intra_mode_idx = ps_codec->s_parse.s_cu.i4_intra_chroma_pred_mode_idx; + ps_tu->b3_chroma_intra_mode_idx = chroma_intra_pred_mode_idx; /* Section:7.3.12 Transform unit syntax inlined here */ - if(ps_codec->s_parse.s_cu.i1_cbf_luma || - ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth] || - ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth]) + if(ps_codec->s_parse.s_cu.i1_cbf_luma || cbf_chroma) { WORD32 intra_pred_mode_chroma; if(ps_pps->i1_cu_qp_delta_enabled_flag && !ps_codec->s_parse.i4_is_cu_qp_delta_coded) @@ -338,32 +404,98 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, ihevcd_parse_residual_coding(ps_codec, x0, y0, log2_trafo_size, 0, intra_pred_mode); } - if(4 == ps_codec->s_parse.s_cu.i4_intra_chroma_pred_mode_idx) - intra_pred_mode_chroma = ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[0]; + WORD32 chroma_blk_luma_intra_pred_mode = + ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? + intra_pred_mode : + ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[0]; + if(4 == chroma_intra_pred_mode_idx) + intra_pred_mode_chroma = chroma_blk_luma_intra_pred_mode; else { - intra_pred_mode_chroma = gau1_intra_pred_chroma_modes[ps_codec->s_parse.s_cu.i4_intra_chroma_pred_mode_idx]; + intra_pred_mode_chroma = gau1_intra_pred_chroma_modes[chroma_intra_pred_mode_idx]; - if(intra_pred_mode_chroma == - ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[0]) + if(intra_pred_mode_chroma == chroma_blk_luma_intra_pred_mode) { intra_pred_mode_chroma = INTRA_ANGULAR(34); } - } - if(log2_trafo_size > 2) + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) { + intra_pred_mode_chroma = gau1_intra_pred_chroma_modes_422[intra_pred_mode_chroma]; + } + if(log2_trafo_size > 2 || ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + WORD32 trafo_offset = (ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 0 : 1); + WORD32 log2_trafo_size_c = MAX(2, log2_trafo_size - trafo_offset); + +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_pps->i1_cross_component_prediction_enabled_flag + && ps_codec->s_parse.s_cu.i1_cbf_luma + && (ps_codec->s_parse.s_cu.i4_pred_mode == PRED_MODE_INTER + || chroma_intra_pred_mode_idx == 4)) + { + ctxt_idx = IHEVC_CAB_CCP_LOG2_RES_ABS; + TRACE_CABAC_CTXT("log2_res_scale_abs_plus1", ps_cabac->u4_range, ctxt_idx); + value = ihevcd_cabac_decode_bins_tunary(ps_cabac, ps_bitstrm, 4, ctxt_idx, + 0, 3); + AEV_TRACE("log2_res_scale_abs_plus1", value, ps_cabac->u4_range); + + if(value != 0) + { + ctxt_idx = IHEVC_CAB_CCP_RES_SIGN_FLAG; + TRACE_CABAC_CTXT("res_scale_sign_flag", ps_cabac->u4_range, ctxt_idx); + ps_tu->b1_cb_log2_res_sign = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); + AEV_TRACE("res_scale_sign_flag", value, ps_cabac->u4_range); + ps_tu->b3_cb_log2_res_scale_abs_plus1 = value; + } + } +#endif if(ps_codec->s_parse.s_cu.ai1_cbf_cb[trafo_depth]) { ps_tu->b1_cb_cbf = 1; - ihevcd_parse_residual_coding(ps_codec, x0, y0, log2_trafo_size - 1, 1, intra_pred_mode_chroma); + ihevcd_parse_residual_coding(ps_codec, x0, y0, log2_trafo_size_c, 1, intra_pred_mode_chroma); } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 && ps_codec->s_parse.s_cu.ai1_cbf_cb_subtu[trafo_depth]) + { + ps_tu->b1_cb_cbf_subtu1 = 1; + ihevcd_parse_residual_coding(ps_codec, x0, y0 + (1 << log2_trafo_size_c), log2_trafo_size_c, 1, intra_pred_mode_chroma); + } +#endif +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_pps->i1_cross_component_prediction_enabled_flag + && ps_codec->s_parse.s_cu.i1_cbf_luma + && (ps_codec->s_parse.s_cu.i4_pred_mode == PRED_MODE_INTER + || chroma_intra_pred_mode_idx == 4)) + { + ctxt_idx = IHEVC_CAB_CCP_LOG2_RES_ABS + 4; + TRACE_CABAC_CTXT("log2_res_scale_abs_plus1", ps_cabac->u4_range, ctxt_idx); + value = ihevcd_cabac_decode_bins_tunary(ps_cabac, ps_bitstrm, 4, ctxt_idx, + 0, 3); + AEV_TRACE("log2_res_scale_abs_plus1", value, ps_cabac->u4_range); + if(value != 0) + { + ctxt_idx = IHEVC_CAB_CCP_RES_SIGN_FLAG + 1; + TRACE_CABAC_CTXT("res_scale_sign_flag", ps_cabac->u4_range, ctxt_idx); + ps_tu->b1_cr_log2_res_sign = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, ctxt_idx); + AEV_TRACE("res_scale_sign_flag", value, ps_cabac->u4_range); + ps_tu->b3_cr_log2_res_scale_abs_plus1 = value; + } + } +#endif if(ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth]) { ps_tu->b1_cr_cbf = 1; - ihevcd_parse_residual_coding(ps_codec, x0, y0, log2_trafo_size - 1, 2, intra_pred_mode_chroma); + ihevcd_parse_residual_coding(ps_codec, x0, y0, log2_trafo_size_c, 2, intra_pred_mode_chroma); } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 && ps_codec->s_parse.s_cu.ai1_cbf_cr_subtu[trafo_depth]) + { + ps_tu->b1_cr_cbf_subtu1 = 1; + ihevcd_parse_residual_coding(ps_codec, x0, y0 + (1 << log2_trafo_size_c), log2_trafo_size_c, 1, intra_pred_mode_chroma); + } +#endif } else if(blk_idx == 3) { @@ -372,12 +504,26 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, ps_tu->b1_cb_cbf = 1; ihevcd_parse_residual_coding(ps_codec, cu_x_base, cu_y_base, log2_trafo_size, 1, intra_pred_mode_chroma); } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 && ps_codec->s_parse.s_cu.ai1_cbf_cb_subtu[trafo_depth]) + { + ps_tu->b1_cb_cbf_subtu1 = 1; + ihevcd_parse_residual_coding(ps_codec, x0, y0, log2_trafo_size, 1, intra_pred_mode_chroma); + } +#endif if(ps_codec->s_parse.s_cu.ai1_cbf_cr[trafo_depth]) { ps_tu->b1_cr_cbf = 1; ihevcd_parse_residual_coding(ps_codec, cu_x_base, cu_y_base, log2_trafo_size, 2, intra_pred_mode_chroma); } +#ifdef ENABLE_MAIN_REXT_PROFILE + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 && ps_codec->s_parse.s_cu.ai1_cbf_cr_subtu[trafo_depth]) + { + ps_tu->b1_cr_cbf_subtu1 = 1; + ihevcd_parse_residual_coding(ps_codec, x0, y0, log2_trafo_size, 1, intra_pred_mode_chroma); + } +#endif } else { @@ -387,7 +533,7 @@ WORD32 ihevcd_parse_transform_tree(codec_t *ps_codec, } else { - if((3 != blk_idx) && (2 == log2_trafo_size)) + if((3 != blk_idx) && (2 == log2_trafo_size && ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV444)) { ps_tu->b3_chroma_intra_mode_idx = INTRA_PRED_CHROMA_IDX_NONE; } @@ -590,7 +736,8 @@ IHEVCD_ERROR_T ihevcd_parse_pcm_sample(codec_t *ps_codec, num_bits = ps_sps->i1_pcm_sample_bit_depth_luma; - for(i = 0; i < 1 << (log2_cb_size << 1); i++) + WORD32 luma_samples = 1 << (log2_cb_size << 1); + for(i = 0; i < luma_samples; i++) { TRACE_CABAC_CTXT("pcm_sample_luma", ps_cabac->u4_range, 0); BITS_PARSE("pcm_sample_luma", value, ps_bitstrm, num_bits); @@ -599,15 +746,25 @@ IHEVCD_ERROR_T ihevcd_parse_pcm_sample(codec_t *ps_codec, *pu1_coeff_data++ = value << (BIT_DEPTH_LUMA - num_bits); } - num_bits = ps_sps->i1_pcm_sample_bit_depth_chroma; - - for(i = 0; i < (1 << (log2_cb_size << 1)) >> 1; i++) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) { - TRACE_CABAC_CTXT("pcm_sample_chroma", ps_cabac->u4_range, 0); - BITS_PARSE("pcm_sample_chroma", value, ps_bitstrm, num_bits); + WORD32 chroma_samples = 0; - // ps_pcmsample_t->i1_pcm_sample_chroma[i] = value; - *pu1_coeff_data++ = value << (BIT_DEPTH_CHROMA - num_bits); + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + chroma_samples = luma_samples << 1; + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + chroma_samples = luma_samples; + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) + chroma_samples = luma_samples >> 1; + num_bits = ps_sps->i1_pcm_sample_bit_depth_chroma; + for(i = 0; i < chroma_samples; i++) + { + TRACE_CABAC_CTXT("pcm_sample_chroma", ps_cabac->u4_range, 0); + BITS_PARSE("pcm_sample_chroma", value, ps_bitstrm, num_bits); + + // ps_pcmsample_t->i1_pcm_sample_chroma[i] = value; + *pu1_coeff_data++ = value << (BIT_DEPTH_CHROMA - num_bits); + } } ps_codec->s_parse.pv_tu_coeff_data = pu1_coeff_data; @@ -1016,7 +1173,13 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit_intra(codec_t *ps_codec, ps_tu = ps_codec->s_parse.ps_tu; ps_tu->b1_cb_cbf = 1; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cb_cbf_subtu1 = 1; +#endif ps_tu->b1_cr_cbf = 1; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cr_cbf_subtu1 = 1; +#endif ps_tu->b1_y_cbf = 1; ps_tu->b4_pos_x = ((x0 - ctb_x_base) >> 2); ps_tu->b4_pos_y = ((y0 - ctb_y_base) >> 2); @@ -1025,6 +1188,12 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit_intra(codec_t *ps_codec, ps_tu->b7_qp = ps_codec->s_parse.u4_qp; ps_tu->b3_chroma_intra_mode_idx = INTRA_PRED_CHROMA_IDX_NONE; ps_tu->b6_luma_intra_mode = INTRA_PRED_NONE; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b3_cb_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cb_log2_res_sign = 0; + ps_tu->b3_cr_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cr_log2_res_sign = 0; +#endif /* Set the first TU in CU flag */ { @@ -1116,23 +1285,52 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit_intra(codec_t *ps_codec, } cnt++; } - TRACE_CABAC_CTXT("intra_chroma_pred_mode", ps_cabac->u4_range, IHEVC_CAB_CHROMA_PRED_MODE); - value = ihevcd_cabac_decode_bin(ps_cabac, - ps_bitstrm, - IHEVC_CAB_CHROMA_PRED_MODE); - ps_codec->s_parse.s_cu.i4_intra_chroma_pred_mode_idx = 4; - if(value) + if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) { - ps_codec->s_parse.s_cu.i4_intra_chroma_pred_mode_idx = - ihevcd_cabac_decode_bypass_bins(ps_cabac, - ps_bitstrm, 2); + for(i = 0; i < part_cnt; i++) + { + TRACE_CABAC_CTXT("intra_chroma_pred_mode", ps_cabac->u4_range, IHEVC_CAB_CHROMA_PRED_MODE); + value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, + IHEVC_CAB_CHROMA_PRED_MODE); + ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[i] = 4; + if(value) + { + ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[i] = + ihevcd_cabac_decode_bypass_bins(ps_cabac, + ps_bitstrm, + 2); + } + AEV_TRACE("intra_chroma_pred_mode", + ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[i], + ps_cabac->u4_range); + } } - AEV_TRACE("intra_chroma_pred_mode", - ps_codec->s_parse.s_cu.i4_intra_chroma_pred_mode_idx, - ps_cabac->u4_range); - + else if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + TRACE_CABAC_CTXT("intra_chroma_pred_mode", ps_cabac->u4_range, IHEVC_CAB_CHROMA_PRED_MODE); + value = ihevcd_cabac_decode_bin(ps_cabac, ps_bitstrm, + IHEVC_CAB_CHROMA_PRED_MODE); + ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[0] = 4; + if (value) { + ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[0] = + ihevcd_cabac_decode_bypass_bins(ps_cabac, ps_bitstrm, + 2); + } + AEV_TRACE("intra_chroma_pred_mode", + ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[0], + ps_cabac->u4_range); + } ihevcd_intra_pred_mode_prediction(ps_codec, log2_cb_size, x0, y0); + + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && part_mode != PART_NxN) + { + // Only required for YUV444, but done for all formats to simplify calling arguments for ihevcd_parse_transform_tree + parse_cu_t *ps_cu = &ps_codec->s_parse.s_cu; + ps_cu->ai4_intra_chroma_pred_mode_idx[1] = ps_cu->ai4_intra_chroma_pred_mode_idx[0]; + ps_cu->ai4_intra_chroma_pred_mode_idx[2] = ps_cu->ai4_intra_chroma_pred_mode_idx[0]; + ps_cu->ai4_intra_chroma_pred_mode_idx[3] = ps_cu->ai4_intra_chroma_pred_mode_idx[0]; + } } STATS_UPDATE_PU_SIZE(ps_pu); /* Increment PU pointer */ @@ -1328,7 +1526,13 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit(codec_t *ps_codec, ctb_y_base = ps_codec->s_parse.i4_ctb_y << ps_sps->i1_log2_ctb_size; ps_tu->b1_cb_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cb_cbf_subtu1 = 0; +#endif ps_tu->b1_cr_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cr_cbf_subtu1 = 0; +#endif ps_tu->b1_y_cbf = 0; ps_tu->b4_pos_x = ((x0 - ctb_x_base) >> 2); ps_tu->b4_pos_y = ((y0 - ctb_y_base) >> 2); @@ -1337,6 +1541,12 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit(codec_t *ps_codec, ps_tu->b7_qp = ps_codec->s_parse.u4_qp; ps_tu->b3_chroma_intra_mode_idx = INTRA_PRED_CHROMA_IDX_NONE; ps_tu->b6_luma_intra_mode = INTRA_PRED_NONE; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b3_cb_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cb_log2_res_sign = 0; + ps_tu->b3_cr_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cr_log2_res_sign = 0; +#endif /* Set the first TU in CU flag */ { @@ -1609,7 +1819,8 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit(codec_t *ps_codec, (ps_sps->i1_max_transform_hierarchy_depth_inter); ret = ihevcd_parse_transform_tree(ps_codec, x0, y0, x0, y0, log2_cb_size, 0, 0, - ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[0]); + ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[0], + ps_codec->s_parse.s_cu.ai4_intra_chroma_pred_mode_idx[0]); RETURN_IF((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret, ret); } else @@ -1622,7 +1833,13 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit(codec_t *ps_codec, ps_tu = ps_codec->s_parse.ps_tu; ps_tu->b1_cb_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cb_cbf_subtu1 = 0; +#endif ps_tu->b1_cr_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cr_cbf_subtu1 = 0; +#endif ps_tu->b1_y_cbf = 0; ps_tu->b4_pos_x = ((x0 - ctb_x_base) >> 2); ps_tu->b4_pos_y = ((y0 - ctb_y_base) >> 2); @@ -1631,6 +1848,12 @@ IHEVCD_ERROR_T ihevcd_parse_coding_unit(codec_t *ps_codec, ps_tu->b7_qp = ps_codec->s_parse.u4_qp; ps_tu->b3_chroma_intra_mode_idx = INTRA_PRED_CHROMA_IDX_NONE; ps_tu->b6_luma_intra_mode = ps_codec->s_parse.s_cu.ai4_intra_luma_pred_mode[0]; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b3_cb_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cb_log2_res_sign = 0; + ps_tu->b3_cr_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cr_log2_res_sign = 0; +#endif /* Set the first TU in CU flag */ { @@ -2070,7 +2293,7 @@ IHEVCD_ERROR_T ihevcd_parse_sao(codec_t *ps_codec) { WORD32 c_idx; WORD32 sao_type_idx = 0; - for(c_idx = 0; c_idx < 3; c_idx++) + for(c_idx = 0; c_idx < (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc ? 3 : 1); c_idx++) { if((ps_slice_hdr->i1_slice_sao_luma_flag && c_idx == 0) || (ps_slice_hdr->i1_slice_sao_chroma_flag && c_idx > 0)) { @@ -2250,7 +2473,13 @@ void ihevcd_set_ctb_skip(codec_t *ps_codec) { ps_tu = ps_codec->s_parse.ps_tu; ps_tu->b1_cb_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cb_cbf_subtu1 = 0; +#endif ps_tu->b1_cr_cbf = 0; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b1_cr_cbf_subtu1 = 0; +#endif ps_tu->b1_y_cbf = 0; ps_tu->b4_pos_x = pu_x >> 2; ps_tu->b4_pos_y = pu_y >> 2; @@ -2259,6 +2488,12 @@ void ihevcd_set_ctb_skip(codec_t *ps_codec) ps_tu->b7_qp = ps_codec->s_parse.u4_qp; ps_tu->b3_chroma_intra_mode_idx = INTRA_PRED_CHROMA_IDX_NONE; ps_tu->b6_luma_intra_mode = INTRA_PRED_NONE; +#ifdef ENABLE_MAIN_REXT_PROFILE + ps_tu->b3_cb_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cb_log2_res_sign = 0; + ps_tu->b3_cr_log2_res_scale_abs_plus1 = 0; + ps_tu->b1_cr_log2_res_sign = 0; +#endif ps_tu->b1_first_tu_in_cu = 1; ps_codec->s_parse.ps_tu++; @@ -2506,11 +2741,19 @@ IHEVCD_ERROR_T ihevcd_parse_slice_data(codec_t *ps_codec) } else if((0 == ps_pps->i1_entropy_coding_sync_enabled_flag) || (ps_pps->i1_entropy_coding_sync_enabled_flag && (0 != ps_codec->s_parse.i4_ctb_x))) { +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD32 ai4_stats[4] = {0}; +#endif ret = ihevcd_cabac_init(&ps_codec->s_parse.s_cabac, &ps_codec->s_parse.s_bitstrm, slice_qp, cabac_init_idc, - &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0]); + &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0] +#ifdef ENABLE_MAIN_REXT_PROFILE + , + ps_sps->i1_persistent_rice_adaptation_enabled_flag ? ai4_stats : NULL +#endif + ); if(ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS) { ps_codec->i4_slice_error = 1; @@ -2602,11 +2845,19 @@ IHEVCD_ERROR_T ihevcd_parse_slice_data(codec_t *ps_codec) * of whether it is a dependent or an independent slice */ if(0 == ps_codec->i4_slice_error) { +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD32 ai4_stats[4] = {0}; +#endif ret = ihevcd_cabac_init(&ps_codec->s_parse.s_cabac, &ps_codec->s_parse.s_bitstrm, slice_qp, cabac_init_idc, - &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0]); + &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0] +#ifdef ENABLE_MAIN_REXT_PROFILE + , + ps_sps->i1_persistent_rice_adaptation_enabled_flag ? ai4_stats : NULL +#endif + ); if(ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS) { ps_codec->i4_slice_error = 1; @@ -2677,12 +2928,20 @@ IHEVCD_ERROR_T ihevcd_parse_slice_data(codec_t *ps_codec) ps_codec->s_parse.u4_qp = slice_qp; if(default_ctxt) { +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD32 ai4_stats[4] = {0}; +#endif //memcpy(&ps_codec->s_parse.s_cabac.au1_ctxt_models, &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0], size); ret = ihevcd_cabac_init(&ps_codec->s_parse.s_cabac, &ps_codec->s_parse.s_bitstrm, slice_qp, cabac_init_idc, - &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0]); + &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0] +#ifdef ENABLE_MAIN_REXT_PROFILE + , + ps_sps->i1_persistent_rice_adaptation_enabled_flag ? ai4_stats : NULL +#endif + ); if(ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS) { @@ -2698,7 +2957,14 @@ IHEVCD_ERROR_T ihevcd_parse_slice_data(codec_t *ps_codec) &ps_codec->s_parse.s_bitstrm, slice_qp, cabac_init_idc, - (const UWORD8 *)&ps_codec->s_parse.s_cabac.au1_ctxt_models_sync); + (const UWORD8 *)&ps_codec->s_parse.s_cabac.au1_ctxt_models_sync +#ifdef ENABLE_MAIN_REXT_PROFILE + , + ps_sps->i1_persistent_rice_adaptation_enabled_flag ? + ps_codec->s_parse.s_cabac.ai4_rice_stat_coeff_sync : + NULL +#endif + ); if(ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS) { @@ -2809,6 +3075,11 @@ IHEVCD_ERROR_T ihevcd_parse_slice_data(codec_t *ps_codec) { WORD32 size = sizeof(ps_codec->s_parse.s_cabac.au1_ctxt_models); memcpy(&ps_codec->s_parse.s_cabac.au1_ctxt_models_sync, &ps_codec->s_parse.s_cabac.au1_ctxt_models, size); + +#ifdef ENABLE_MAIN_REXT_PROFILE + size = sizeof(ps_codec->s_parse.s_cabac.ai4_rice_stat_coeff_sync); + memcpy(&ps_codec->s_parse.s_cabac.ai4_rice_stat_coeff_sync, &ps_codec->s_parse.s_cabac.ai4_rice_stat_coeff, size); +#endif } } diff --git a/decoder/ihevcd_parse_slice_header.c b/decoder/ihevcd_parse_slice_header.c index e452488..4f63658 100644 --- a/decoder/ihevcd_parse_slice_header.c +++ b/decoder/ihevcd_parse_slice_header.c @@ -403,6 +403,7 @@ IHEVCD_ERROR_T ihevcd_parse_slice_header(codec_t *ps_codec, (ps_slice_hdr->i1_slice_type > 2)) return IHEVCD_IGNORE_SLICE; + if(ps_pps->i1_output_flag_present_flag) { BITS_PARSE("pic_output_flag", value, ps_bitstrm, 1); @@ -440,6 +441,9 @@ IHEVCD_ERROR_T ihevcd_parse_slice_header(codec_t *ps_codec, { numbits = 32 - CLZ(ps_sps->i1_num_short_term_ref_pic_sets - 1); BITS_PARSE("short_term_ref_pic_set_idx", value, ps_bitstrm, numbits); + if (value >= ps_sps->i1_num_short_term_ref_pic_sets) { + return IHEVCD_INVALID_PARAMETER; + } ps_slice_hdr->i1_short_term_ref_pic_set_idx = value; } @@ -566,8 +570,10 @@ IHEVCD_ERROR_T ihevcd_parse_slice_header(codec_t *ps_codec, BITS_PARSE("slice_sao_luma_flag", value, ps_bitstrm, 1); ps_slice_hdr->i1_slice_sao_luma_flag = value; - BITS_PARSE("slice_sao_chroma_flag", value, ps_bitstrm, 1); - ps_slice_hdr->i1_slice_sao_chroma_flag = value; + if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) { + BITS_PARSE("slice_sao_chroma_flag", value, ps_bitstrm, 1); + ps_slice_hdr->i1_slice_sao_chroma_flag = value; + } } @@ -805,11 +811,12 @@ IHEVCD_ERROR_T ihevcd_parse_slice_header(codec_t *ps_codec, if(ps_codec->i4_pic_present) { prev_slice_incomplete_flag = 1; + ps_codec->i4_slice_error = 1; + ps_codec->s_parse.i4_cur_slice_idx--; + if(ps_codec->s_parse.i4_cur_slice_idx < 0) + ps_codec->s_parse.i4_cur_slice_idx = 0; } - else - { - return IHEVCD_IGNORE_SLICE; - } + return IHEVCD_IGNORE_SLICE; } /* If the slice address is less than the next CTB's index, * extra CTBs have been decoded in the previous slice. diff --git a/decoder/ihevcd_process_slice.c b/decoder/ihevcd_process_slice.c index 50b2c00..4da1fb2 100644 --- a/decoder/ihevcd_process_slice.c +++ b/decoder/ihevcd_process_slice.c @@ -536,6 +536,13 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) WORD32 ctb_size = 1 << ps_sps->i1_log2_ctb_size; + WORD32 h_samp_factor, v_samp_factor; + WORD32 chroma_row_strd; + WORD32 chroma_pixel_strd = 2; + + h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2; + v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1; + PROFILE_DISABLE_PROCESS_CTB(); ps_codec = ps_proc->ps_codec; @@ -968,15 +975,17 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) /*TODO: Add support for custom scaling matrices */ } - /* CTB Level pointers */ ps_proc->pu1_cur_ctb_luma = ps_proc->pu1_cur_pic_luma + (ps_proc->i4_ctb_x * ctb_size + ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd); - ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma - + ps_proc->i4_ctb_x * ctb_size - + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd / 2); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma + + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor) + + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd * chroma_pixel_strd / (h_samp_factor * v_samp_factor)); + } ihevcd_iquant_itrans_recon_ctb(ps_proc); } @@ -1201,6 +1210,8 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) /* Call padding if required */ { + chroma_row_strd = ps_codec->i4_strd * chroma_pixel_strd / h_samp_factor; + #if SAO_PROCESS_SHIFT_CTB if(0 == ps_proc->i4_ctb_x) @@ -1212,16 +1223,21 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) + (ps_proc->i4_ctb_x * ctb_size + ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd); - ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma - + ps_proc->i4_ctb_x * ctb_size - + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd / 2); pad_ht_luma = ctb_size; pad_ht_luma += (ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y ? 8 : 0; - pad_ht_chroma = ctb_size / 2; /* Pad left after 1st CTB is processed */ ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_ctb_luma - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_ctb_chroma - 16 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma + + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor) + + (ps_proc->i4_ctb_y * ctb_size * chroma_row_strd / v_samp_factor); + pad_ht_chroma = ctb_size / v_samp_factor; + ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr( + ps_proc->pu1_cur_ctb_chroma - (8 * v_samp_factor) * chroma_row_strd, + chroma_row_strd, pad_ht_chroma, PAD_LEFT * chroma_pixel_strd / h_samp_factor); + } } if((ps_sps->i2_pic_wd_in_ctb - 1) == ps_proc->i4_ctb_x) @@ -1234,22 +1250,35 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) + (ps_proc->i4_ctb_x * ctb_size + ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd); - ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma - + ps_proc->i4_ctb_x * ctb_size - + (ps_proc->i4_ctb_y * ctb_size * ps_codec->i4_strd / 2); - + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ps_proc->pu1_cur_ctb_chroma = ps_proc->pu1_cur_pic_chroma + + (ps_proc->i4_ctb_x * ctb_size * chroma_pixel_strd / h_samp_factor) + + (ps_proc->i4_ctb_y * ctb_size * chroma_row_strd / v_samp_factor); + pad_ht_chroma = ctb_size / v_samp_factor; + } pad_ht_luma = ctb_size; - pad_ht_chroma = ctb_size / 2; if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) { pad_ht_luma += 8; - pad_ht_chroma += 16; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_pic_chroma + (ps_sps->i2_pic_height_in_luma_samples / 2 - 16) * ps_codec->i4_strd, - ps_codec->i4_strd, 16, PAD_LEFT); + if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + pad_ht_chroma += (8 * v_samp_factor); + ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr( + ps_proc->pu1_cur_pic_chroma + ((ps_sps->i2_pic_height_in_luma_samples / v_samp_factor) - (8 * v_samp_factor)) * chroma_row_strd, + chroma_row_strd, (8 * v_samp_factor), + PAD_LEFT * chroma_pixel_strd / h_samp_factor); + } } /* Pad right after last CTB in the current row is processed */ ps_codec->s_func_selector.ihevc_pad_right_luma_fptr(ps_proc->pu1_cur_ctb_luma + cols_remaining - 8 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_RIGHT); - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(ps_proc->pu1_cur_ctb_chroma + cols_remaining - 16 * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_RIGHT); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr( + ps_proc->pu1_cur_ctb_chroma + (cols_remaining * chroma_pixel_strd / h_samp_factor) - (8 * v_samp_factor) * chroma_row_strd, + chroma_row_strd, pad_ht_chroma, + PAD_RIGHT * chroma_pixel_strd / h_samp_factor); + } if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) { @@ -1258,14 +1287,27 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) /* Hence moving top padding to to end of frame, Moving it to second row also results in problems when there is only one row */ /* Pad top after padding left and right for current rows after processing 1st CTB row */ ihevc_pad_top(ps_proc->pu1_cur_pic_luma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP); - ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP / 2); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT * (chroma_pixel_strd / h_samp_factor), + chroma_row_strd, + (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * (chroma_pixel_strd / h_samp_factor), + PAD_TOP / v_samp_factor); + } + /* Pad bottom after padding left and right for current rows after processing 1st CTB row */ pu1_buf = ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * ps_sps->i2_pic_height_in_luma_samples - PAD_LEFT; - /* Pad top after padding left and right for current rows after processing 1st CTB row */ ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT); - - pu1_buf = ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2) - PAD_LEFT; - ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT / 2); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + pu1_buf = ps_proc->pu1_cur_pic_chroma + + chroma_row_strd * (ps_sps->i2_pic_height_in_luma_samples / v_samp_factor) + - (PAD_LEFT * chroma_pixel_strd / h_samp_factor); + ihevc_pad_bottom(pu1_buf, + chroma_row_strd, + (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * (chroma_pixel_strd / h_samp_factor), + PAD_BOT / v_samp_factor); + } } } #else @@ -1280,7 +1322,10 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) pad_ht_chroma = ctb_size / 2; /* Pad left after 1st CTB is processed */ ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_ctb_luma - 2 * ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_ctb_chroma - ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_ctb_chroma - ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); + } } else if((ps_sps->i2_pic_wd_in_ctb - 1) == ps_proc->i4_ctb_x) { @@ -1292,7 +1337,10 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) pad_ht_chroma = ((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) ? 3 * ctb_size / 2 : ctb_size / 2; /* Pad right after last CTB in the current row is processed */ ps_codec->s_func_selector.ihevc_pad_right_luma_fptr(ps_proc->pu1_cur_ctb_luma + cols_remaining - 2 * ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_luma, PAD_RIGHT); - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(ps_proc->pu1_cur_ctb_chroma + cols_remaining - ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_RIGHT); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr(ps_proc->pu1_cur_ctb_chroma + cols_remaining - ctb_size * ps_codec->i4_strd, ps_codec->i4_strd, pad_ht_chroma, PAD_RIGHT); + } if((ps_sps->i2_pic_ht_in_ctb - 1) == ps_proc->i4_ctb_y) { @@ -1305,21 +1353,30 @@ IHEVCD_ERROR_T ihevcd_process(process_ctxt_t *ps_proc) ps_codec->s_func_selector.ihevc_pad_left_luma_fptr(ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples - 2 * ctb_size), ps_codec->i4_strd, pad_ht_luma, PAD_LEFT); - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2 - ctb_size), - ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr(ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2 - ctb_size), + ps_codec->i4_strd, pad_ht_chroma, PAD_LEFT); + } /* Since SAO is shifted by 8x8, chroma padding can not be done till second row is processed */ /* Hence moving top padding to to end of frame, Moving it to second row also results in problems when there is only one row */ /* Pad top after padding left and right for current rows after processing 1st CTB row */ ihevc_pad_top(ps_proc->pu1_cur_pic_luma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP); - ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP / 2); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + ihevc_pad_top(ps_proc->pu1_cur_pic_chroma - PAD_LEFT, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_TOP / 2); + } pu1_buf = ps_proc->pu1_cur_pic_luma + ps_codec->i4_strd * ps_sps->i2_pic_height_in_luma_samples - PAD_LEFT; /* Pad top after padding left and right for current rows after processing 1st CTB row */ ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT); - pu1_buf = ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2) - PAD_LEFT; - ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT / 2); + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { + pu1_buf = ps_proc->pu1_cur_pic_chroma + ps_codec->i4_strd * (ps_sps->i2_pic_height_in_luma_samples / 2) - PAD_LEFT; + ihevc_pad_bottom(pu1_buf, ps_codec->i4_strd, ps_sps->i2_pic_width_in_luma_samples + PAD_WD, PAD_BOT / 2); + } } } } @@ -1596,31 +1653,32 @@ void ihevcd_process_thread(process_ctxt_t *ps_proc) ithread_set_affinity(ps_proc->i4_id + 1); } -#ifdef KEEP_THREADS_ACTIVE while(1) { codec_t *ps_dec = ps_proc->ps_codec; - DEBUG("In ihevcd_process_thread \n"); - - ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); - if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) - break; - - while(!ps_dec->ai4_process_start[ps_proc->i4_id]) + if(ps_proc->ps_codec->i4_threads_active) { - ithread_cond_wait(ps_dec->apv_proc_start_condition[ps_proc->i4_id], - ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); + DEBUG("In ihevcd_process_thread \n"); + + ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); + if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) + break; + + while(!ps_dec->ai4_process_start[ps_proc->i4_id]) + { + ithread_cond_wait(ps_dec->apv_proc_start_condition[ps_proc->i4_id], + ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); + } + ps_dec->ai4_process_start[ps_proc->i4_id] = 0; + ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); + if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) + break; + + DEBUG(" Got control at ihevcd_process_thread \n"); + + if(ps_dec->i4_break_threads == 1) + break; } - ps_dec->ai4_process_start[ps_proc->i4_id] = 0; - ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[ps_proc->i4_id]); - if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) - break; - - DEBUG(" Got control at ihevcd_process_thread \n"); - - if(ps_dec->i4_break_threads == 1) - break; -#endif while(1) { proc_job_t s_job; @@ -1670,19 +1728,24 @@ void ihevcd_process_thread(process_ctxt_t *ps_proc) s_job.i2_ctb_y << ps_sps->i1_log2_ctb_size, num_rows); } } -#ifdef KEEP_THREADS_ACTIVE - ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); - if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) - break; + if(ps_proc->ps_codec->i4_threads_active) + { + ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); + if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) + break; - ps_dec->ai4_process_done[ps_proc->i4_id] = 1; - ithread_cond_signal(ps_dec->apv_proc_done_condition[ps_proc->i4_id]); + ps_dec->ai4_process_done[ps_proc->i4_id] = 1; + ithread_cond_signal(ps_dec->apv_proc_done_condition[ps_proc->i4_id]); - ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); - if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) + ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[ps_proc->i4_id]); + if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret) + break; + } + else + { break; + } } -#endif //ithread_exit(0); return; } diff --git a/decoder/ihevcd_sao.c b/decoder/ihevcd_sao.c index 243e2ce..2ea935a 100644 --- a/decoder/ihevcd_sao.c +++ b/decoder/ihevcd_sao.c @@ -364,6 +364,9 @@ void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt) } + /* Chroma */ + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) + { if(0 == ps_sao->b3_cb_type_idx) { for(row = 0; row < sao_ht_chroma; row++) @@ -517,7 +520,7 @@ void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt) } } - + } } } @@ -693,6 +696,7 @@ void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) /* Chroma */ + if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) { UWORD32 u4_no_loop_filter_flag; WORD32 loop_filter_bit_pos; @@ -1114,7 +1118,7 @@ void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma); } - if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ps_slice_hdr_top_left->i1_slice_sao_chroma_flag) { if(0 == ps_sao->b3_cb_type_idx) { @@ -1435,7 +1439,7 @@ void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) } } } - else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)) + else if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))) { for(row = 0; row < sao_ht_chroma; row++) { @@ -1761,7 +1765,7 @@ void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) } } - if(0 != sao_wd_chroma) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_wd_chroma) { if(ps_slice_hdr_top->i1_slice_sao_chroma_flag) { @@ -2360,7 +2364,7 @@ void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) } } - if(0 != sao_ht_chroma) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_ht_chroma) { if(ps_slice_hdr_left->i1_slice_sao_chroma_flag) { @@ -2995,7 +2999,7 @@ void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) } } - if((0 != sao_wd_chroma) && (0 != sao_ht_chroma)) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && (0 != sao_wd_chroma) && (0 != sao_ht_chroma)) { if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) { @@ -3415,7 +3419,7 @@ void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt) } /* Chroma */ - if(no_loop_filter_enabled_chroma) + if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && no_loop_filter_enabled_chroma) { UWORD32 u4_no_loop_filter_flag; WORD32 loop_filter_bit_pos; diff --git a/decoder/ihevcd_structs.h b/decoder/ihevcd_structs.h index cbc86af..13e49b4 100644 --- a/decoder/ihevcd_structs.h +++ b/decoder/ihevcd_structs.h @@ -119,6 +119,13 @@ typedef struct cab_ctxt */ UWORD8 au1_ctxt_models_sync[IHEVC_CAB_CTXT_END]; +#ifdef ENABLE_MAIN_REXT_PROFILE + /** golomb rice adaptation statistics */ + WORD32 ai4_rice_stat_coeff[4]; + + WORD32 ai4_rice_stat_coeff_sync[4]; +#endif + }cab_ctxt_t; typedef enum @@ -736,7 +743,7 @@ typedef struct /** * Chroma pred mode index to be used to compute intra pred mode for chroma */ - WORD32 i4_intra_chroma_pred_mode_idx; + WORD32 ai4_intra_chroma_pred_mode_idx[4]; /** * Maximum transform depth */ @@ -751,11 +758,17 @@ typedef struct * Cb CBF */ UWORD8 ai1_cbf_cb[MAX_TRAFO_DEPTH]; +#ifdef ENABLE_MAIN_REXT_PROFILE + UWORD8 ai1_cbf_cb_subtu[MAX_TRAFO_DEPTH]; +#endif /** * Cr CBF */ UWORD8 ai1_cbf_cr[MAX_TRAFO_DEPTH]; +#ifdef ENABLE_MAIN_REXT_PROFILE + UWORD8 ai1_cbf_cr_subtu[MAX_TRAFO_DEPTH]; +#endif /** * Intra split flag @@ -1161,10 +1174,12 @@ typedef struct */ WORD32 i4_next_tu_ctb_cnt; +#ifndef DISABLE_SEI /** * SEI parameters */ sei_params_t s_sei_params; +#endif }parse_ctxt_t; /** @@ -1463,8 +1478,20 @@ typedef struct /** Intermediate buffer to be used during inverse transform */ WORD16 *pi2_itrans_intrmd_buf; +#ifdef ENABLE_MAIN_REXT_PROFILE + /** + * residue buffer to be store output of inverse transform. + * Only used for frext tool sets + */ + WORD16 *pi2_res_luma_buf; + WORD16 *pi2_res_chroma_buf; +#endif + /** Buffer to hold output of inverse scan */ WORD16 *pi2_invscan_out; +#ifdef ENABLE_MAIN_REXT_PROFILE + WORD16 *pi2_invscan_out_subtu; +#endif /** * Top availability for current CTB level @@ -1588,6 +1615,14 @@ typedef void (*pf_intra_pred)(UWORD8 *pu1_ref, WORD32 nt, WORD32 mode); +typedef void (*pf_itrans_res)(WORD16 *pi2_src, + WORD16 *pi2_tmp, + WORD16 *pi2_dst, + WORD32 i4_src_strd, + WORD32 i4_dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); + typedef void (*pf_itrans_recon)(WORD16 *pi2_src, WORD16 *pi2_tmp, UWORD8 *pu1_pred, @@ -1613,6 +1648,11 @@ typedef void (*pf_itrans_recon_dc)(UWORD8 *pu1_pred, WORD32 log2_trans_size, WORD16 i2_coeff_value); +typedef void (*pf_itrans_res_dc)(WORD16 *pi2_dst, + WORD32 dst_strd, + WORD32 log2_trans_size, + WORD16 i2_coeff_value); + typedef void (*pf_sao_luma)(UWORD8 *, WORD32, @@ -1904,6 +1944,11 @@ struct _codec_t */ UWORD32 u4_bitsbuf_size_dynamic; + /** + * Bitmask specifying the set of supported YUV output formats + */ + UWORD32 u4_enable_yuv_formats; + /** * Pointer to hold TU data for a set of CTBs or a picture */ @@ -2080,7 +2125,6 @@ struct _codec_t */ parse_ctxt_t s_parse; -#ifdef KEEP_THREADS_ACTIVE /** * Condition variable to signal process start */ @@ -2115,7 +2159,6 @@ struct _codec_t * Flag to signal processing thread to exit */ WORD32 i4_break_threads; -#endif /** * Processing context - One for each processing thread @@ -2212,15 +2255,21 @@ struct _codec_t /** Funtion pointers for inter_pred_chroma leaf level functions */ pf_intra_pred apf_intra_pred_chroma[11]; + /** Funtion pointers for itrans leaf level functions */ + pf_itrans_res apf_itrans_res[5]; + /** Funtion pointers for itrans_recon leaf level functions */ - pf_itrans_recon apf_itrans_recon[8]; + pf_itrans_recon apf_itrans_recon[9]; /** Funtion pointers for recon leaf level functions */ - pf_recon apf_recon[8]; + pf_recon apf_recon[9]; /** Funtion pointers for itrans_recon_dc leaf level functions */ pf_itrans_recon_dc apf_itrans_recon_dc[2]; + /** Funtion pointers for itrans dc leaf level functions */ + pf_itrans_res_dc apf_itrans_res_dc; + /** Funtion pointers for sao_luma leaf level functions */ pf_sao_luma apf_sao_luma[4]; @@ -2239,6 +2288,8 @@ struct _codec_t /** Number of active display buffers - for shared mode */ WORD32 i4_share_disp_buf_cnt; + + WORD32 i4_threads_active; }; #endif /* _IHEVCD_STRUCTS_H_ */ diff --git a/decoder/ihevcd_utils.c b/decoder/ihevcd_utils.c old mode 100755 new mode 100644 index 563d948..98ebd49 --- a/decoder/ihevcd_utils.c +++ b/decoder/ihevcd_utils.c @@ -216,7 +216,22 @@ WORD32 ihevcd_get_total_pic_buf_size(codec_t *ps_codec, num_luma_samples = (wd + PAD_WD) * (ht + PAD_HT); /* Account for chroma */ - num_samples = num_luma_samples * 3 / 2; + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + num_samples = num_luma_samples * 3; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + num_samples = num_luma_samples * 2; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) + { + num_samples = num_luma_samples * 3 / 2; + } + else + { + num_samples = num_luma_samples; + } /* Number of bytes in reference pictures */ size = num_samples * max_dpb_size; @@ -298,17 +313,33 @@ WORD32 ihevcd_get_pic_mv_bank_size(WORD32 num_luma_samples) * ******************************************************************************* */ -WORD32 ihevcd_get_tu_data_size(WORD32 num_luma_samples) +WORD32 ihevcd_get_tu_data_size(codec_t *ps_codec, WORD32 num_luma_samples) { - + sps_t *ps_sps = (ps_codec->s_parse.ps_sps_base + ps_codec->i4_sps_id); WORD32 tu_data_size; WORD32 num_ctb; WORD32 num_luma_tu, num_chroma_tu, num_tu; num_ctb = num_luma_samples / (MIN_CTB_SIZE * MIN_CTB_SIZE); num_luma_tu = num_luma_samples / (MIN_TU_SIZE * MIN_TU_SIZE); - num_chroma_tu = num_luma_tu >> 1; + + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + num_chroma_tu = num_luma_tu << 1; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + num_chroma_tu = num_luma_tu; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) + { + num_chroma_tu = num_luma_tu >> 1; + } + else + { + num_chroma_tu = 0; + } num_tu = num_luma_tu + num_chroma_tu; tu_data_size = 0; @@ -459,13 +490,16 @@ IHEVCD_ERROR_T ihevcd_pic_buf_mgr_add_bufs(codec_t *ps_codec) UWORD8 *pu1_buf; pic_buf_t *ps_pic_buf; WORD32 pic_buf_size_allocated; - - + WORD32 h_samp_factor, v_samp_factor; + WORD32 chroma_pixel_strd = 2; /* Initialize Pic buffer manager */ ps_sps = ps_codec->s_parse.ps_sps; + h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2; + v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1; + /* Compute the number of Pic buffers needed */ max_dpb_size = ps_sps->ai1_sps_max_dec_pic_buffering[ps_sps->i1_sps_max_sub_layers - 1]; @@ -496,7 +530,14 @@ IHEVCD_ERROR_T ihevcd_pic_buf_mgr_add_bufs(codec_t *ps_codec) luma_samples = (ps_codec->i4_strd) * (ps_sps->i2_pic_height_in_luma_samples + PAD_HT); - chroma_samples = luma_samples / 2; + if(CHROMA_FMT_IDC_MONOCHROME == ps_sps->i1_chroma_format_idc) + { + chroma_samples = 0; + } + else + { + chroma_samples = luma_samples * 2 / (h_samp_factor * v_samp_factor); + } /* Try to add as many buffers as possible since memory is already allocated */ /* If the number of buffers that can be added is less than max_num_bufs @@ -515,8 +556,17 @@ IHEVCD_ERROR_T ihevcd_pic_buf_mgr_add_bufs(codec_t *ps_codec) ps_pic_buf->pu1_luma = pu1_buf + ps_codec->i4_strd * PAD_TOP + PAD_LEFT; pu1_buf += luma_samples; - ps_pic_buf->pu1_chroma = pu1_buf + ps_codec->i4_strd * (PAD_TOP / 2) + PAD_LEFT; - pu1_buf += chroma_samples; + if(chroma_samples) + { + ps_pic_buf->pu1_chroma = pu1_buf + + (ps_codec->i4_strd * chroma_pixel_strd / h_samp_factor) * (PAD_TOP / v_samp_factor) + + (PAD_LEFT * chroma_pixel_strd / h_samp_factor); + pu1_buf += chroma_samples; + } + else + { + ps_pic_buf->pu1_chroma = NULL; + } /* Pad boundary pixels (one pixel on all sides) */ /* This ensures SAO does not read uninitialized pixels */ @@ -542,21 +592,25 @@ IHEVCD_ERROR_T ihevcd_pic_buf_mgr_add_bufs(codec_t *ps_codec) pu1_buf += strd * ht; memset(pu1_buf - 1, 0, wd + 2); - pu1_buf = ps_pic_buf->pu1_chroma; - ht >>= 1; - for(i = 0; i < ht; i++) + if(ps_pic_buf->pu1_chroma) { - pu1_buf[-1] = 0; - pu1_buf[-2] = 0; - pu1_buf[wd] = 0; - pu1_buf[wd + 1] = 0; - pu1_buf += strd; - } - pu1_buf = ps_pic_buf->pu1_chroma; - memset(pu1_buf - strd - 2, 0, wd + 4); + pu1_buf = ps_pic_buf->pu1_chroma; + ht /= v_samp_factor; + WORD32 chroma_strd_scale = chroma_pixel_strd / h_samp_factor; + for(i = 0; i < ht; i++) + { + pu1_buf[-1] = 0; + pu1_buf[-2] = 0; + pu1_buf[wd * chroma_strd_scale] = 0; + pu1_buf[wd * chroma_strd_scale + 1] = 0; + pu1_buf += (strd * chroma_strd_scale); + } + pu1_buf = ps_pic_buf->pu1_chroma; + memset(pu1_buf - (strd * chroma_strd_scale) - 2, 0, wd * chroma_strd_scale + 4); - pu1_buf += strd * ht; - memset(pu1_buf - 2, 0, wd + 4); + pu1_buf += (strd * chroma_strd_scale) * ht; + memset(pu1_buf - 2, 0, wd * chroma_strd_scale + 4); + } } buf_ret = ihevc_buf_mgr_add((buf_mgr_t *)ps_codec->pv_pic_buf_mgr, ps_pic_buf, i); @@ -587,7 +641,8 @@ IHEVCD_ERROR_T ihevcd_pic_buf_mgr_add_bufs(codec_t *ps_codec) break; } ps_pic_buf->pu1_luma += ps_codec->i4_strd * PAD_TOP + PAD_LEFT; - ps_pic_buf->pu1_chroma += ps_codec->i4_strd * (PAD_TOP / 2) + PAD_LEFT; + ps_pic_buf->pu1_chroma += (ps_codec->i4_strd * chroma_pixel_strd / h_samp_factor) * (PAD_TOP / v_samp_factor) + + (PAD_LEFT * chroma_pixel_strd / h_samp_factor); } } @@ -741,15 +796,15 @@ IHEVCD_ERROR_T ihevcd_check_out_buf_size(codec_t *ps_codec) if(ps_codec->e_chroma_fmt == IV_YUV_420P) u4_min_num_out_bufs = MIN_OUT_BUFS_420; - else if(ps_codec->e_chroma_fmt == IV_YUV_422ILE) - u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE; - else if(ps_codec->e_chroma_fmt == IV_RGB_565) - u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565; - else if(ps_codec->e_chroma_fmt == IV_RGBA_8888) - u4_min_num_out_bufs = MIN_OUT_BUFS_RGBA8888; + else if(ps_codec->e_chroma_fmt == IV_YUV_444P) + u4_min_num_out_bufs = MIN_OUT_BUFS_444; + else if(ps_codec->e_chroma_fmt == IV_YUV_422P) + u4_min_num_out_bufs = MIN_OUT_BUFS_422; else if((ps_codec->e_chroma_fmt == IV_YUV_420SP_UV) || (ps_codec->e_chroma_fmt == IV_YUV_420SP_VU)) u4_min_num_out_bufs = MIN_OUT_BUFS_420SP; + else if(ps_codec->e_chroma_fmt == IV_GRAY) + u4_min_num_out_bufs = MIN_OUT_BUFS_GRAY; if(ps_codec->e_chroma_fmt == IV_YUV_420P) { @@ -757,23 +812,11 @@ IHEVCD_ERROR_T ihevcd_check_out_buf_size(codec_t *ps_codec) au4_min_out_buf_size[1] = (wd * ht) >> 2; au4_min_out_buf_size[2] = (wd * ht) >> 2; } - else if(ps_codec->e_chroma_fmt == IV_YUV_422ILE) + else if(ps_codec->e_chroma_fmt == IV_YUV_444P) { - au4_min_out_buf_size[0] = (wd * ht) * 2; - au4_min_out_buf_size[1] = - au4_min_out_buf_size[2] = 0; - } - else if(ps_codec->e_chroma_fmt == IV_RGB_565) - { - au4_min_out_buf_size[0] = (wd * ht) * 2; - au4_min_out_buf_size[1] = - au4_min_out_buf_size[2] = 0; - } - else if(ps_codec->e_chroma_fmt == IV_RGBA_8888) - { - au4_min_out_buf_size[0] = (wd * ht) * 4; - au4_min_out_buf_size[1] = - au4_min_out_buf_size[2] = 0; + au4_min_out_buf_size[0] = (wd * ht); + au4_min_out_buf_size[1] = (wd * ht); + au4_min_out_buf_size[2] = (wd * ht); } else if((ps_codec->e_chroma_fmt == IV_YUV_420SP_UV) || (ps_codec->e_chroma_fmt == IV_YUV_420SP_VU)) @@ -782,6 +825,19 @@ IHEVCD_ERROR_T ihevcd_check_out_buf_size(codec_t *ps_codec) au4_min_out_buf_size[1] = (wd * ht) >> 1; au4_min_out_buf_size[2] = 0; } + else if(ps_codec->e_chroma_fmt == IV_GRAY) + { + au4_min_out_buf_size[0] = (wd * ht); + au4_min_out_buf_size[1] = 0; + au4_min_out_buf_size[2] = 0; + } + else if(ps_codec->e_chroma_fmt == IV_YUV_422P) + { + au4_min_out_buf_size[0] = (wd * ht); + au4_min_out_buf_size[1] = (wd * ht) >> 1; + au4_min_out_buf_size[2] = (wd * ht) >> 1; + } + if(ps_out_buffer->u4_num_bufs < u4_min_num_out_bufs) { @@ -830,6 +886,8 @@ IHEVCD_ERROR_T ihevcd_parse_pic_init(codec_t *ps_codec) pic_buf_t *ps_cur_pic; slice_header_t *ps_slice_hdr; UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma; + WORD32 h_samp_factor, v_samp_factor; + WORD32 chroma_pixel_strd = 2; WORD32 i; ps_codec->s_parse.i4_error_code = IHEVCD_SUCCESS; @@ -841,7 +899,8 @@ IHEVCD_ERROR_T ihevcd_parse_pic_init(codec_t *ps_codec) memset(ps_codec->s_parse.pu1_pic_intra_flag, 0, num_min_cu); memset(ps_codec->s_parse.pu1_pic_no_loop_filter_flag, 0, num_min_cu); - + h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2; + v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1; if(0 == ps_codec->s_parse.i4_first_pic_init) { @@ -925,9 +984,9 @@ IHEVCD_ERROR_T ihevcd_parse_pic_init(codec_t *ps_codec) pu1_cur_pic_luma = pu1_buf; pu1_buf = ps_cur_pic->pu1_chroma; - pu1_cur_pic_chroma = pu1_buf; +#ifndef DISABLE_SEI ps_cur_pic->s_sei_params.i1_sei_parameters_present_flag = 0; if(ps_codec->s_parse.s_sei_params.i1_sei_parameters_present_flag) { @@ -948,12 +1007,19 @@ IHEVCD_ERROR_T ihevcd_parse_pic_init(codec_t *ps_codec) ps_sei->i1_active_parameter_set = 0; ps_sei->i4_sei_mastering_disp_colour_vol_params_present_flags = 0; } +#endif } if(0 == ps_codec->u4_pic_cnt) { memset(ps_cur_pic->pu1_luma, 128, (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * ps_sps->i2_pic_height_in_luma_samples); - memset(ps_cur_pic->pu1_chroma, 128, (ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * ps_sps->i2_pic_height_in_luma_samples / 2); + if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME) + { + memset(ps_cur_pic->pu1_chroma, + 128, + (((ps_sps->i2_pic_width_in_luma_samples + PAD_WD) * (chroma_pixel_strd / h_samp_factor)) + * ps_sps->i2_pic_height_in_luma_samples / v_samp_factor)); + } } /* Fill the remaining entries of the reference lists with the nearest POC @@ -1040,9 +1106,10 @@ IHEVCD_ERROR_T ihevcd_parse_pic_init(codec_t *ps_codec) /* Reset the jobq to start of the jobq buffer */ ihevcd_jobq_reset((jobq_t *)ps_codec->pv_proc_jobq); -#ifdef KEEP_THREADS_ACTIVE - ps_codec->i4_break_threads = 0; -#endif + if(ps_codec->i4_threads_active) + { + ps_codec->i4_break_threads = 0; + } ps_codec->s_parse.i4_pic_pu_idx = 0; ps_codec->s_parse.i4_pic_tu_idx = 0; @@ -1068,7 +1135,22 @@ IHEVCD_ERROR_T ihevcd_parse_pic_init(codec_t *ps_codec) ctb_luma_min_tu_cnt = pic_size / (MIN_TU_SIZE * MIN_TU_SIZE); - ctb_chroma_min_tu_cnt = ctb_luma_min_tu_cnt >> 1; + if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + ctb_chroma_min_tu_cnt = ctb_luma_min_tu_cnt << 1; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) + { + ctb_chroma_min_tu_cnt = ctb_luma_min_tu_cnt; + } + else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) + { + ctb_chroma_min_tu_cnt = ctb_luma_min_tu_cnt >> 1; + } + else + { + ctb_chroma_min_tu_cnt = 0; + } ctb_min_tu_cnt = ctb_luma_min_tu_cnt + ctb_chroma_min_tu_cnt; @@ -1248,16 +1330,17 @@ IHEVCD_ERROR_T ihevcd_parse_pic_init(codec_t *ps_codec) (void *)&ps_codec->as_process[i]); ps_codec->ai4_process_thread_created[i] = 1; } -#ifdef KEEP_THREADS_ACTIVE - ret = ithread_mutex_lock(ps_codec->apv_proc_start_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + if(ps_codec->i4_threads_active) + { + ret = ithread_mutex_lock(ps_codec->apv_proc_start_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); - ps_codec->ai4_process_start[i] = 1; - ithread_cond_signal(ps_codec->apv_proc_start_condition[i]); + ps_codec->ai4_process_start[i] = 1; + ithread_cond_signal(ps_codec->apv_proc_start_condition[i]); - ret = ithread_mutex_unlock(ps_codec->apv_proc_start_mutex[i]); - RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); -#endif + ret = ithread_mutex_unlock(ps_codec->apv_proc_start_mutex[i]); + RETURN_IF((ret != (IHEVCD_ERROR_T)IHEVCD_SUCCESS), ret); + } } else { diff --git a/decoder/ihevcd_utils.h b/decoder/ihevcd_utils.h index 893acaa..9bc1951 100644 --- a/decoder/ihevcd_utils.h +++ b/decoder/ihevcd_utils.h @@ -40,7 +40,7 @@ WORD32 ihevcd_get_lvl_idx(WORD32 level); WORD32 ihevcd_get_dpb_size(WORD32 level, WORD32 pic_size); WORD32 ihevcd_get_pic_mv_bank_size(WORD32 num_luma_samples); -WORD32 ihevcd_get_tu_data_size(WORD32 num_luma_samples); +WORD32 ihevcd_get_tu_data_size(codec_t *ps_codec, WORD32 num_luma_samples); WORD32 ihevcd_nctb_cnt(codec_t *ps_codec, sps_t *ps_sps); WORD32 ihevcd_get_max_luma_samples(WORD32 level); IHEVCD_ERROR_T ihevcd_get_tile_pos(pps_t *ps_pps, diff --git a/decoder/libhevcdec.cmake b/decoder/libhevcdec.cmake old mode 100755 new mode 100644 index 35812d4..1db95d6 --- a/decoder/libhevcdec.cmake +++ b/decoder/libhevcdec.cmake @@ -33,13 +33,12 @@ list( include_directories(${HEVC_ROOT}/decoder) # arm/x86 sources -if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") +if("${SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${SYSTEM_PROCESSOR}" STREQUAL "arm64") list( APPEND LIBHEVCDEC_ASMS "${HEVC_ROOT}/decoder/arm64/ihevcd_fmt_conv_420sp_to_420p.s" "${HEVC_ROOT}/decoder/arm64/ihevcd_fmt_conv_420sp_to_420sp.s" - "${HEVC_ROOT}/decoder/arm64/ihevcd_fmt_conv_420sp_to_rgba8888.s" "${HEVC_ROOT}/decoder/arm/ihevcd_function_selector.c" "${HEVC_ROOT}/decoder/arm/ihevcd_function_selector_noneon.c" "${HEVC_ROOT}/decoder/arm64/ihevcd_function_selector_av8.c" @@ -47,13 +46,12 @@ if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") "${HEVC_ROOT}/decoder/arm64/ihevcd_itrans_recon_dc_luma.s") include_directories(${HEVC_ROOT}/decoder/arm64) -elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32") +elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch32") list( APPEND LIBHEVCDEC_ASMS "${HEVC_ROOT}/decoder/arm/ihevcd_fmt_conv_420sp_to_420p.s" "${HEVC_ROOT}/decoder/arm/ihevcd_fmt_conv_420sp_to_420sp.s" - "${HEVC_ROOT}/decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s" "${HEVC_ROOT}/decoder/arm/ihevcd_function_selector_a9q.c" "${HEVC_ROOT}/decoder/arm/ihevcd_function_selector.c" "${HEVC_ROOT}/decoder/arm/ihevcd_function_selector_noneon.c" @@ -78,3 +76,4 @@ endif() add_library(libhevcdec STATIC ${LIBHEVC_COMMON_SRCS} ${LIBHEVC_COMMON_ASMS} ${LIBHEVCDEC_ASMS} ${LIBHEVCDEC_SRCS}) +target_compile_definitions(libhevcdec PRIVATE ENABLE_MAIN_REXT_PROFILE) diff --git a/decoder/riscv64/ihevcd_function_selector.c b/decoder/riscv64/ihevcd_function_selector.c index a2a13ed..066a8ab 100644 --- a/decoder/riscv64/ihevcd_function_selector.c +++ b/decoder/riscv64/ihevcd_function_selector.c @@ -54,9 +54,7 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" void ihevcd_init_function_ptr(void *pv_codec) { @@ -65,7 +63,7 @@ void ihevcd_init_function_ptr(void *pv_codec) { default: case ARCH_RISCV64_GENERIC: - ihevcd_init_function_ptr_generic(pv_codec); + ihevcd_init_function_ptr_generic(&ps_codec->s_func_selector); } } diff --git a/decoder/riscv64/ihevcd_function_selector_generic.c b/decoder/riscv64/ihevcd_function_selector_generic.c index f8b53ad..ff2d237 100644 --- a/decoder/riscv64/ihevcd_function_selector_generic.c +++ b/decoder/riscv64/ihevcd_function_selector_generic.c @@ -54,109 +54,113 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" -void ihevcd_init_function_ptr_generic(void *pv_codec) +void ihevcd_init_function_ptr_generic(func_selector_t *ps_func_selector) { - codec_t *ps_codec = (codec_t *)pv_codec; - - ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz; - ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert; - ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert; - ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; - ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25; - ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc; - ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9; - ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver; - ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; - ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; - ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; - ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4; - ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8; - ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; - ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; - ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy; - ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; - ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset; - ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; - ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit; - ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; - ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default; - ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni; - ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma; - ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma; + ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz; + ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert; + ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert; + ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz; + ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy; + ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out; + ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz; + ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out; + ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out; + ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz; + ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert; + ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp; + ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy; + ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out; + ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out; + ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; + ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc; + ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz; + ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2; + ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34; + ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33; + ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9; + ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar; + ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver; + ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17; + ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25; + ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17; + ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25; + ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc; + ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz; + ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2; + ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34; + ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33; + ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9; + ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar; + ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver; + ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; + ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; + ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; + ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; + ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; + ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1; + ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4; + ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8; + ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16; + ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32; + ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1; + ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4; + ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8; + ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16; + ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32; + ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; + ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; + ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; + ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32; + ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; + ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4; + ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8; + ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16; + ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32; + ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; + ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; + ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; + ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32; + ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; + ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy; + ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; + ps_func_selector->ihevc_memset_fptr = &ihevc_memset; + ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; + ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit; + ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; + ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; + ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; + ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; + ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi; + ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default; + ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni; + ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi; + ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default; + ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni; + ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma; + ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma; + ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0; + ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma; + ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1; + ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma; + ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2; + ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma; + ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3; + ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; + ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p; + ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma; + ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma; + ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc; } diff --git a/decoder/x86/ihevcd_function_selector.c b/decoder/x86/ihevcd_function_selector.c index b058a62..2ad66be 100644 --- a/decoder/x86/ihevcd_function_selector.c +++ b/decoder/x86/ihevcd_function_selector.c @@ -64,23 +64,23 @@ void ihevcd_init_function_ptr(void *pv_codec) switch(ps_codec->e_processor_arch) { case ARCH_X86_GENERIC: - ihevcd_init_function_ptr_generic(pv_codec); + ihevcd_init_function_ptr_generic(&ps_codec->s_func_selector); break; case ARCH_X86_SSSE3: - ihevcd_init_function_ptr_ssse3(pv_codec); + ihevcd_init_function_ptr_ssse3(&ps_codec->s_func_selector); break; case ARCH_X86_SSE42: - ihevcd_init_function_ptr_sse42(pv_codec); + ihevcd_init_function_ptr_sse42(&ps_codec->s_func_selector); break; case ARCH_X86_AVX2: #ifndef DISABLE_AVX2 - ihevcd_init_function_ptr_avx2(pv_codec); + ihevcd_init_function_ptr_avx2(&ps_codec->s_func_selector); #else - ihevcd_init_function_ptr_sse42(pv_codec); + ihevcd_init_function_ptr_sse42(&ps_codec->s_func_selector); #endif break; default: - ihevcd_init_function_ptr_ssse3(pv_codec); + ihevcd_init_function_ptr_ssse3(&ps_codec->s_func_selector); break; } } diff --git a/decoder/x86/ihevcd_function_selector_generic.c b/decoder/x86/ihevcd_function_selector_generic.c index f8b53ad..ff2d237 100644 --- a/decoder/x86/ihevcd_function_selector_generic.c +++ b/decoder/x86/ihevcd_function_selector_generic.c @@ -54,109 +54,113 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" -void ihevcd_init_function_ptr_generic(void *pv_codec) +void ihevcd_init_function_ptr_generic(func_selector_t *ps_func_selector) { - codec_t *ps_codec = (codec_t *)pv_codec; - - ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz; - ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert; - ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert; - ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; - ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25; - ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc; - ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9; - ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver; - ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; - ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; - ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; - ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4; - ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8; - ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; - ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; - ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy; - ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; - ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset; - ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; - ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit; - ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; - ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default; - ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni; - ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma; - ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma; + ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz; + ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert; + ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert; + ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz; + ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy; + ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out; + ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz; + ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out; + ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out; + ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz; + ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert; + ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp; + ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy; + ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out; + ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out; + ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; + ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc; + ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz; + ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2; + ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34; + ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33; + ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9; + ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar; + ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver; + ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17; + ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25; + ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17; + ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25; + ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc; + ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz; + ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2; + ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34; + ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33; + ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9; + ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar; + ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver; + ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; + ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; + ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; + ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; + ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; + ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1; + ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4; + ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8; + ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16; + ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32; + ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1; + ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4; + ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8; + ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16; + ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32; + ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; + ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; + ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; + ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32; + ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; + ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4; + ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8; + ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16; + ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32; + ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; + ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; + ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; + ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32; + ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; + ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy; + ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; + ps_func_selector->ihevc_memset_fptr = &ihevc_memset; + ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; + ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit; + ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; + ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; + ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; + ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; + ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi; + ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default; + ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni; + ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi; + ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default; + ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni; + ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma; + ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma; + ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0; + ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma; + ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1; + ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma; + ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2; + ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma; + ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3; + ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; + ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p; + ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma; + ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma; + ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc; } diff --git a/decoder/x86/ihevcd_function_selector_sse42.c b/decoder/x86/ihevcd_function_selector_sse42.c index fe46cc4..432c75c 100644 --- a/decoder/x86/ihevcd_function_selector_sse42.c +++ b/decoder/x86/ihevcd_function_selector_sse42.c @@ -54,109 +54,113 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" -void ihevcd_init_function_ptr_sse42(void *pv_codec) +void ihevcd_init_function_ptr_sse42(func_selector_t *ps_func_selector) { - codec_t *ps_codec = (codec_t *)pv_codec; - - ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_ssse3; - ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_ssse3; - ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_ssse3; - ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_sse42; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_sse42; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_sse42; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; - ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_sse42; - ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_sse42; - ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; - ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; - ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; - ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_sse42; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_sse42; - ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_sse42; - ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_ssse3; - ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_sse42; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4; - ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8; - ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; - ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; - ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy; - ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; - ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset; - ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; - ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit; - ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; - ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_sse42; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_sse42; - ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_sse42; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_sse42; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_ssse3; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_sse42; - ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_ssse3; - ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_ssse3; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_ssse3; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_sse42; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_sse42; + ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_ssse3; + ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_ssse3; + ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_ssse3; + ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_sse42; + ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_sse42; + ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_ssse3; + ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_ssse3; + ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_sse42; + ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; + ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_sse42; + ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_sse42; + ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_sse42; + ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_sse42; + ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_sse42; + ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_sse42; + ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_sse42; + ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_sse42; + ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_sse42; + ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_ssse3; + ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_sse42; + ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; + ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; + ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; + ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; + ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; + ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1; + ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4; + ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8; + ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16; + ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32; + ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_sse42; + ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_sse42; + ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_sse42; + ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_ssse3; + ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_sse42; + ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; + ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; + ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; + ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32; + ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; + ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4; + ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8; + ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16; + ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32; + ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; + ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; + ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; + ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32; + ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; + ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy; + ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; + ps_func_selector->ihevc_memset_fptr = &ihevc_memset; + ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; + ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit; + ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; + ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; + ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; + ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; + ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_sse42; + ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_sse42; + ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_sse42; + ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_sse42; + ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_ssse3; + ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_sse42; + ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_ssse3; + ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_ssse3; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_ssse3; + ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p; + ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_sse42; + ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_sse42; + ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc; } diff --git a/decoder/x86/ihevcd_function_selector_ssse3.c b/decoder/x86/ihevcd_function_selector_ssse3.c index fdb471a..ebb33a6 100644 --- a/decoder/x86/ihevcd_function_selector_ssse3.c +++ b/decoder/x86/ihevcd_function_selector_ssse3.c @@ -54,109 +54,113 @@ #include "ihevc_dpb_mgr.h" #include "ihevc_error.h" -#include "ihevcd_defs.h" #include "ihevcd_function_selector.h" -#include "ihevcd_structs.h" -void ihevcd_init_function_ptr_ssse3(void *pv_codec) +void ihevcd_init_function_ptr_ssse3(func_selector_t *ps_func_selector) { - codec_t *ps_codec = (codec_t *)pv_codec; - - ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_ssse3; - ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_ssse3; - ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_ssse3; - ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_ssse3; - ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; - ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_ssse3; - ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_ssse3; - ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; - ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; - ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; - ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; - ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_ssse3; - ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_ssse3; - ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_ssse3; - ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_ssse3; - ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_ssse3; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; - ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4; - ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8; - ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16; - ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32; - ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; - ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; - ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; - ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; - ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy; - ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; - ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset; - ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; - ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit; - ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; - ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; - ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; - ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_ssse3; - ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_ssse3; - ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_ssse3; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_ssse3; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_ssse3; - ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_ssse3; - ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_ssse3; - ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_ssse3; - ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_ssse3; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; - ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_ssse3; - ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_ssse3; + ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_ssse3; + ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_ssse3; + ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_ssse3; + ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_ssse3; + ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_ssse3; + ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_ssse3; + ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution; + ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble; + ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering; + ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_ssse3; + ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_ssse3; + ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_ssse3; + ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_ssse3; + ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_ssse3; + ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_ssse3; + ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_ssse3; + ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1; + ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4; + ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8; + ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16; + ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32; + ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1; + ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4; + ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8; + ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16; + ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32; + ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_ssse3; + ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_ssse3; + ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_ssse3; + ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_ssse3; + ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_ssse3; + ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4; + ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8; + ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16; + ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32; + ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1; + ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4; + ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8; + ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16; + ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32; + ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4; + ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8; + ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16; + ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32; + ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8; + ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy; + ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8; + ps_func_selector->ihevc_memset_fptr = &ihevc_memset; + ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8; + ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit; + ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma; + ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma; + ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma; + ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma; + ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_ssse3; + ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_ssse3; + ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_ssse3; + ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_ssse3; + ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_ssse3; + ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_ssse3; + ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_ssse3; + ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_ssse3; + ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_ssse3; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp; + ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p; + ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p; + ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_ssse3; + ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_ssse3; + ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc; } diff --git a/encoder/hme_interface.h b/encoder/hme_interface.h index 33c98fa..1481d3c 100644 --- a/encoder/hme_interface.h +++ b/encoder/hme_interface.h @@ -158,8 +158,7 @@ typedef enum ME_MEDIUM_SPEED, ME_HIGH_SPEED, ME_XTREME_SPEED, - ME_XTREME_SPEED_25, - ME_USER_DEFINED + ME_XTREME_SPEED_25 } ME_QUALITY_PRESETS_T; /*****************************************************************************/ diff --git a/encoder/hme_refine.c b/encoder/hme_refine.c index 2932bd1..7a2e980 100644 --- a/encoder/hme_refine.c +++ b/encoder/hme_refine.c @@ -5129,7 +5129,7 @@ void hme_populate_cu_tree( #if ENABLE_CU_TREE_CULLING { - cur_ctb_cu_tree_t *ps_32x32_root; + cur_ctb_cu_tree_t *ps_32x32_root = NULL; switch(e_parent_blk_pos) { @@ -5157,6 +5157,11 @@ void hme_populate_cu_tree( break; } + default: + { + DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos); + break; + } } if(ps_32x32_root->is_node_valid) @@ -5249,6 +5254,11 @@ void hme_populate_cu_tree( break; } + default: + { + DBG_PRINTF("Invalid block position %d\n", e_grandparent_blk_pos); + break; + } } switch(e_parent_blk_pos) @@ -5277,6 +5287,11 @@ void hme_populate_cu_tree( break; } + default: + { + DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos); + break; + } } ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos]; diff --git a/encoder/ihevce_api.h b/encoder/ihevce_api.h index 28b2a1f..6a55a1d 100644 --- a/encoder/ihevce_api.h +++ b/encoder/ihevce_api.h @@ -88,7 +88,7 @@ #define IHEVCE_PAYLOAD_TYPE_SHIFT (16) -#define MAX_FRAME_RATE 120.0 +#define MAX_FRAME_RATE 300.0 #define MIN_FRAME_RATE 1.0 /*****************************************************************************/ @@ -682,7 +682,7 @@ typedef struct * parameters of encoder * * these new params can be passed as async commands - * to the enocder by sending a IHEVCE_CMD_CTL_SETPARAMS command + * to the encoder by sending a IHEVCE_CMD_CTL_SETPARAMS command */ typedef struct { @@ -707,7 +707,7 @@ typedef struct * parameters of encoder for dynamic resolution change * * these new params can be passed as synchromous commands - * to the enocder by sending a IHEVCE_SYNCH_API_SET_RES_TAG command + * to the encoder by sending a IHEVCE_SYNCH_API_SET_RES_TAG command */ typedef struct { diff --git a/encoder/ihevce_decomp_pre_intra_pass.c b/encoder/ihevce_decomp_pre_intra_pass.c index 065fcbb..e272f82 100644 --- a/encoder/ihevce_decomp_pre_intra_pass.c +++ b/encoder/ihevce_decomp_pre_intra_pass.c @@ -859,7 +859,7 @@ WORD32 ihevce_cu_level_qp_mod( *pi4_act_factor = (1 << QP_LEVEL_MOD_ACT_FACTOR); if(cu_satd != -1 && (WORD32)frm_avg_activity != 0) { - ULWORD64 sq_cur_satd = (cu_satd * cu_satd); + ULWORD64 sq_cur_satd = ((ULWORD64)cu_satd * (ULWORD64)cu_satd); float log2_sq_cur_satd = fast_log2(1 + sq_cur_satd); WORD32 qp_offset = f_mod_strength * (log2_sq_cur_satd - frm_avg_activity); diff --git a/encoder/ihevce_enc_loop_inter_mode_sifter.c b/encoder/ihevce_enc_loop_inter_mode_sifter.c index 6a05cda..46fefcf 100644 --- a/encoder/ihevce_enc_loop_inter_mode_sifter.c +++ b/encoder/ihevce_enc_loop_inter_mode_sifter.c @@ -426,8 +426,8 @@ static WORD8 ihevce_merge_cand_pred_buffer_preparation( WORD32 i4_part_wd_pu2; WORD32 i4_part_ht_pu2; WORD32 i4_buf_offset; - UWORD8 *pu1_pred_src; - UWORD8 *pu1_pred_dst; + UWORD8 *pu1_pred_src = NULL; + UWORD8 *pu1_pred_dst = NULL; WORD8 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0]; WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel; @@ -557,6 +557,11 @@ static WORD8 ihevce_merge_cand_pred_buffer_preparation( break; } + default: + { + DBG_PRINTF("Invalid partition type %d\n", u1_part_type); + break; + } } pf_copy_2d( @@ -591,7 +596,7 @@ static WORD8 ihevce_mixed_mode_cand_type1_pred_buffer_preparation( WORD32 i4_part_ht; WORD32 i4_part_wd_pu2; WORD32 i4_part_ht_pu2; - UWORD8 *pu1_pred_src; + UWORD8 *pu1_pred_src = NULL; UWORD8 *pu1_pred_dst = NULL; WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]; @@ -669,6 +674,11 @@ static WORD8 ihevce_mixed_mode_cand_type1_pred_buffer_preparation( break; } + default: + { + DBG_PRINTF("Invalid partition type %d\n", u1_part_type); + break; + } } ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval]; @@ -910,7 +920,7 @@ static WORD8 ihevce_mixed_mode_cand_type0_pred_buffer_preparation( WORD32 i4_part_wd_pu2; WORD32 i4_part_ht_pu2; WORD32 i4_buf_offset; - UWORD8 *pu1_pred_src; + UWORD8 *pu1_pred_src = NULL; UWORD8 *pu1_pred_dst = NULL; WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]; @@ -981,6 +991,11 @@ static WORD8 ihevce_mixed_mode_cand_type0_pred_buffer_preparation( break; } + default: + { + DBG_PRINTF("Invalid partition type %d\n", u1_part_type); + break; + } } ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval]; diff --git a/encoder/ihevce_enc_loop_utils.c b/encoder/ihevce_enc_loop_utils.c index 4665cd0..6da2c97 100644 --- a/encoder/ihevce_enc_loop_utils.c +++ b/encoder/ihevce_enc_loop_utils.c @@ -6244,7 +6244,8 @@ UWORD8 ihevce_distortion_based_intra_chroma_mode_selector( u1_trans_size, nbr_flags, pu1_ref_sub_out, - 1); + 1, + CHROMA_FMT_IDC_YUV420); /* use the look up to get the function idx */ chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode]; @@ -6636,7 +6637,8 @@ void ihevce_intra_chroma_pred_mode_selector( trans_size, nbr_flags, (UWORD8 *)ps_ctxt->pv_ref_sub_out, - 1); + 1, + CHROMA_FMT_IDC_YUV420); /* use the look up to get the function idx */ chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode]; @@ -7396,7 +7398,8 @@ LWORD64 ihevce_chroma_cu_prcs_rdopt( trans_size, nbr_flags, (UWORD8 *)ps_ctxt->pv_ref_sub_out, - 1); + 1, + CHROMA_FMT_IDC_YUV420); /* use the look up to get the function idx */ chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode]; @@ -9418,7 +9421,8 @@ void ihevce_final_rdopt_mode_prcs( chroma_trans_size, nbr_flags, (UWORD8 *)ps_ctxt->pv_ref_sub_out, - 1); + 1, + CHROMA_FMT_IDC_YUV420); /* use the look up to get the function idx */ chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode]; diff --git a/encoder/ihevce_enc_structs.h b/encoder/ihevce_enc_structs.h index ac39673..bc872b5 100644 --- a/encoder/ihevce_enc_structs.h +++ b/encoder/ihevce_enc_structs.h @@ -1589,8 +1589,10 @@ typedef struct /** Buffer pointer for CTB level information in pre intra pass*/ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1; +#ifndef DISABLE_SEI /** vps parameters activated by current slice */ sei_params_t s_sei; +#endif /** nal_type for the slice to be encoded */ WORD32 i4_slice_nal_type; @@ -1826,6 +1828,7 @@ typedef struct ULWORD64 i8_frame_inter_cost; } s_pic_level_acc_info_t; +#ifndef DISABLE_SEI typedef struct { UWORD32 u4_target_bit_rate_sei_entropy; @@ -1833,6 +1836,7 @@ typedef struct UWORD32 u4_dbf_entropy; } s_pic_level_sei_info_t; +#endif /** ****************************************************************************** * @brief ME pass and Main enocde pass shared variables and buffers @@ -1912,12 +1916,14 @@ typedef struct } me_enc_rdopt_ctxt_t; +#ifndef DISABLE_SEI typedef struct { UWORD32 u4_payload_type; UWORD32 u4_payload_length; UWORD8 *pu1_sei_payload; } sei_payload_t; +#endif typedef struct { @@ -1954,8 +1960,10 @@ typedef struct /** vps parameters activated by current slice */ vps_t *ps_vps; +#ifndef DISABLE_SEI /** vps parameters activated by current slice */ sei_params_t s_sei; +#endif /* Flag to indicate if AUD NAL is present */ WORD8 i1_aud_present_flag; @@ -2019,9 +2027,11 @@ typedef struct WORD32 i4_is_end_of_idr_gop; +#ifndef DISABLE_SEI sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD]; UWORD32 u4_num_sei_payload; +#endif /* Flag used only in mres single output case to flush out one res and start with next */ WORD32 i4_out_flush_flag; diff --git a/encoder/ihevce_encode_header_sei_vui.c b/encoder/ihevce_encode_header_sei_vui.c index 68cef99..f3de07e 100644 --- a/encoder/ihevce_encode_header_sei_vui.c +++ b/encoder/ihevce_encode_header_sei_vui.c @@ -595,6 +595,7 @@ WORD32 ihevce_generate_vui(bitstrm_t *ps_bitstrm, sps_t *ps_sps, vui_t s_vui) return return_status; } +#ifndef DISABLE_SEI /** ****************************************************************************** * @@ -1608,7 +1609,7 @@ WORD32 ihevce_generate_sei( /*************************************************************************************************/ /* NOTE: Need to terminate and start new SEI message after active parameter set SEI */ /* Buffering period/pic timing SEI refering to active SPS cannot be embedded in same SEI message */ - /* This is because SPS is activated in HM deocder after completely parsing full SEI message. */ + /* This is because SPS is activated in HM decoder after completely parsing full SEI message. */ /*************************************************************************************************/ if(1) /* Insert New SEI for buffering period after active parameter set SEI */ { @@ -2470,6 +2471,7 @@ WORD32 ihevce_populate_hash_sei( return IHEVCE_SUCCESS; } +#endif /** ****************************************************************************** diff --git a/encoder/ihevce_encode_header_sei_vui.h b/encoder/ihevce_encode_header_sei_vui.h index cb69456..df21582 100644 --- a/encoder/ihevce_encode_header_sei_vui.h +++ b/encoder/ihevce_encode_header_sei_vui.h @@ -40,6 +40,7 @@ /* Function Macros */ /*****************************************************************************/ +#ifndef DISABLE_SEI /** ****************************************************************************** * @brief Macro to calculate the CRC for a bit index @@ -93,17 +94,6 @@ typedef enum /* Extern Function Declarations */ /*****************************************************************************/ -WORD32 ihevce_generate_sub_layer_hrd_params( - bitstrm_t *ps_bitstrm, - sub_lyr_hrd_params_t *ps_sub_lyr_hrd_params, - hrd_params_t *ps_hrd_params, - WORD32 cpb_cnt_minus1); - -WORD32 - ihevce_generate_hrd_params(bitstrm_t *ps_bitstrm, hrd_params_t *ps_hrd_params, sps_t *ps_sps); - -WORD32 ihevce_generate_vui(bitstrm_t *ps_bitstrm, sps_t *ps_sps, vui_t s_vui); - WORD32 ihevce_put_buf_period_sei_params( buf_period_sei_params_t *ps_bp_sei, vui_t *ps_vui_params, bitstrm_t *ps_bitstrm); @@ -163,6 +153,18 @@ WORD32 ihevce_populate_hash_sei( WORD32 uv_strd, WORD32 i4_frame_pos_x, WORD32 i4_frame_pos_y); +#endif + +WORD32 ihevce_generate_sub_layer_hrd_params( + bitstrm_t *ps_bitstrm, + sub_lyr_hrd_params_t *ps_sub_lyr_hrd_params, + hrd_params_t *ps_hrd_params, + WORD32 cpb_cnt_minus1); + +WORD32 + ihevce_generate_hrd_params(bitstrm_t *ps_bitstrm, hrd_params_t *ps_hrd_params, sps_t *ps_sps); + +WORD32 ihevce_generate_vui(bitstrm_t *ps_bitstrm, sps_t *ps_sps, vui_t s_vui); WORD32 ihevce_populate_vui( vui_t *ps_vui, diff --git a/encoder/ihevce_entropy_cod.c b/encoder/ihevce_entropy_cod.c index be79699..4605685 100644 --- a/encoder/ihevce_entropy_cod.c +++ b/encoder/ihevce_entropy_cod.c @@ -282,6 +282,7 @@ WORD32 ihevce_ent_coding_thrd(void *pv_frm_proc_thrd_ctxt) PROFILE_START( &ps_hle_ctxt->profile_entropy[ps_enc_ctxt->i4_resolution_id][i4_bitrate_instance_num]); +#ifndef DISABLE_SEI /* Content Light Level Information */ { ps_curr_inp->s_sei.i1_sei_cll_enable = @@ -291,6 +292,7 @@ WORD32 ihevce_ent_coding_thrd(void *pv_frm_proc_thrd_ctxt) ps_curr_inp->s_sei.s_cll_info_sei_params.u2_sei_avg_cll = ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.u2_sei_avg_cll; } +#endif if((NULL != ps_curr_out) && (NULL != ps_curr_inp)) { @@ -370,6 +372,7 @@ WORD32 ihevce_ent_coding_thrd(void *pv_frm_proc_thrd_ctxt) if(1 == ps_curr_inp->i4_frm_proc_valid_flag) { +#ifndef DISABLE_SEI /* --- Init of buffering period and pic timing SEI related params ----*/ { UWORD32 i4_dbf, i4_buffersize, i4_trgt_bit_rate; @@ -414,10 +417,12 @@ WORD32 ihevce_ent_coding_thrd(void *pv_frm_proc_thrd_ctxt) ps_curr_inp->ps_sps->ai1_sps_max_num_reorder_pics[0] + ps_curr_inp->i4_display_num - u4_encode_frm_num; } +#endif /* call the core entropy coding entry point function */ entropy_error = ihevce_entropy_encode_frame( pv_entropy_hdl, ps_curr_out, ps_curr_inp, ps_curr_out->i4_bitstream_buf_size); +#ifndef DISABLE_SEI /* ----------------- Derivation of u4_au_cpb_removal_delay_minus1 --------------------------------*/ if(ps_curr_inp->s_sei.i1_buf_period_params_present_flag) { @@ -438,6 +443,7 @@ WORD32 ihevce_ent_coding_thrd(void *pv_frm_proc_thrd_ctxt) u4_au_cpb_removal_delay_minus1 = (u4_au_cpb_removal_delay_minus1 + 1) & u4_max_cpb_removal_delay_val; } +#endif /* Debug prints for entropy error */ if(entropy_error) { @@ -449,13 +455,20 @@ WORD32 ihevce_ent_coding_thrd(void *pv_frm_proc_thrd_ctxt) /* acquire mutex lock for rate control calls */ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl); + UWORD32 removal_delay_minus1; +#ifndef DISABLE_SEI + removal_delay_minus1 = + ps_curr_inp->s_sei.s_pic_timing_sei_params.u4_au_cpb_removal_delay_minus1; +#else + removal_delay_minus1 = 0; +#endif /* get frame rate/bit rate/max buffer size */ ihevce_vbv_compliance_frame_level_update( ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bitrate_instance_num], (ps_curr_out->i4_bytes_generated << 3), i4_resolution_id, i4_bitrate_instance_num, - ps_curr_inp->s_sei.s_pic_timing_sei_params.u4_au_cpb_removal_delay_minus1); + removal_delay_minus1); /* release mutex lock after rate control calls */ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl); } diff --git a/encoder/ihevce_entropy_interface.c b/encoder/ihevce_entropy_interface.c index 729c8e4..6014f8d 100644 --- a/encoder/ihevce_entropy_interface.c +++ b/encoder/ihevce_entropy_interface.c @@ -322,7 +322,9 @@ WORD32 ihevce_entropy_encode_frame( vps_t *ps_vps = ps_curr_inp->ps_vps; sps_t *ps_sps = ps_curr_inp->ps_sps; pps_t *ps_pps = ps_curr_inp->ps_pps; +#ifndef DISABLE_SEI sei_params_t *ps_sei = &ps_curr_inp->s_sei; +#endif ihevce_tile_params_t *ps_tile_params_base; WORD32 out_buf_size = i4_out_buf_size; @@ -355,7 +357,9 @@ WORD32 ihevce_entropy_encode_frame( ps_entropy_ctxt->ps_vps = ps_vps; ps_entropy_ctxt->ps_sps = ps_sps; ps_entropy_ctxt->ps_pps = ps_pps; +#ifndef DISABLE_SEI ps_entropy_ctxt->ps_sei = ps_sei; +#endif ps_entropy_ctxt->ps_slice_hdr = &ps_curr_inp->s_slice_hdr; ps_entropy_ctxt->i4_is_cu_cbf_zero = 1; @@ -413,6 +417,7 @@ WORD32 ihevce_entropy_encode_frame( ret |= ihevce_generate_pps(ps_bitstrm, ps_entropy_ctxt->ps_pps); } +#ifndef DISABLE_SEI /* generate sei */ if(1 == ps_entropy_ctxt->ps_sei->i1_sei_parameters_present_flag) { @@ -435,6 +440,7 @@ WORD32 ihevce_entropy_encode_frame( &ps_curr_inp->as_sei_payload[0]); } } +#endif /*PIC INFO: Populate slice header bits */ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_slice_header += @@ -674,6 +680,7 @@ WORD32 ihevce_entropy_encode_frame( } } +#ifndef DISABLE_SEI /* generate suffix sei */ if(1 == ps_entropy_ctxt->ps_sei->i1_sei_parameters_present_flag) { @@ -693,6 +700,7 @@ WORD32 ihevce_entropy_encode_frame( /* Updating bytes generated */ ps_curr_out->i4_bytes_generated += ps_bitstrm->u4_strm_buf_offset; } +#endif /* generate end of sequence nal */ if((1 == ps_curr_inp->i1_eos_present_flag) && (ps_curr_inp->i4_is_end_of_idr_gop == 1)) diff --git a/encoder/ihevce_entropy_structs.h b/encoder/ihevce_entropy_structs.h index 5e69c25..ad880c2 100644 --- a/encoder/ihevce_entropy_structs.h +++ b/encoder/ihevce_entropy_structs.h @@ -213,8 +213,10 @@ typedef struct entropy_context /** pointer to current pps parameters */ pps_t *ps_pps; +#ifndef DISABLE_SEI /** pointer to current sei parameters */ sei_params_t *ps_sei; +#endif /** pointer to current slice header parameters */ slice_header_t *ps_slice_hdr; diff --git a/encoder/ihevce_error_check.c b/encoder/ihevce_error_check.c index 9295b29..fbdfd9e 100644 --- a/encoder/ihevce_error_check.c +++ b/encoder/ihevce_error_check.c @@ -350,6 +350,7 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_interop_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_interop_flags); PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sps_at_cdr_enable %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sps_at_cdr_enable); PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_vui_enable %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_vui_enable); +#ifndef DISABLE_SEI PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_enable_flag %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag); PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_payload_enable_flag %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_payload_enable_flag); PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_buffer_period_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_buffer_period_flags); @@ -368,6 +369,7 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u4_max_display_mastering_luminance %d \n", ps_static_cfg_prms->s_out_strm_prms.u4_max_display_mastering_luminance); PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u4_min_display_mastering_luminance %d \n", ps_static_cfg_prms->s_out_strm_prms.u4_min_display_mastering_luminance); PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_hash_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag); +#endif PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_app_tile_params\n"); PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_tiles_enabled_flag %d \n", ps_static_cfg_prms->s_app_tile_params.i4_tiles_enabled_flag); @@ -644,6 +646,7 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c return IHEVCE_SETUNSUPPORTEDINPUT(error_code); } +#ifndef DISABLE_SEI if((ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag > 1) || (ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag < 0)) { @@ -661,6 +664,7 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c pv_cb_handle, "IHEVCE ERROR: i4_sei_payload_enable_flag should be set to 1 or 0 \n"); return IHEVCE_SETUNSUPPORTEDINPUT(error_code); } +#endif if((ps_static_cfg_prms->s_multi_thrd_prms.i4_max_num_cores > MAX_NUM_CORES) || (ps_static_cfg_prms->s_multi_thrd_prms.i4_max_num_cores < 1)) { @@ -708,8 +712,6 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c } { - WORD32 sub_gop_size = (1 << ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers) - << ps_static_cfg_prms->s_src_prms.i4_field_pic; WORD32 i4_max_idr_period, i4_min_idr_period, i4_max_cra_period, i4_max_i_period; WORD32 i4_max_i_distance; WORD32 i4_min_i_distance = 0, i4_non_zero_idr_period = 0x7FFFFFFF, @@ -719,6 +721,12 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c i4_max_cra_period = ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period; i4_max_i_period = ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period; i4_max_i_distance = MAX(MAX(i4_max_idr_period, i4_max_cra_period), i4_max_i_period); + WORD32 num_b_frms = + (1 << ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1; + if (i4_max_i_distance <= num_b_frms) + ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers = 0; + WORD32 sub_gop_size = (1 << ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers) + << ps_static_cfg_prms->s_src_prms.i4_field_pic; if(sub_gop_size > 1) { @@ -1340,6 +1348,7 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c } } +#ifndef DISABLE_SEI /* Check SEI related error checks */ if(1 == ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag) { @@ -1450,6 +1459,7 @@ WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_c } } } +#endif if(1 == ps_static_cfg_prms->s_out_strm_prms.i4_vui_enable) { diff --git a/encoder/ihevce_error_codes.h b/encoder/ihevce_error_codes.h index 4bdd274..9f377a4 100644 --- a/encoder/ihevce_error_codes.h +++ b/encoder/ihevce_error_codes.h @@ -94,7 +94,9 @@ typedef enum IHEVCE_VUI_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x09, +#ifndef DISABLE_SEI IHEVCE_SEI_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x0A, +#endif IHEVCE_SPS_AT_CDR_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x0B, @@ -288,13 +290,17 @@ typedef enum IHEVCE_INVALID_CORE_CONFIG = IHEVCE_API_ERROR_START + 0x67, +#ifndef DISABLE_SEI IHEVCE_SEI_MESSAGES_DEPENDENCY = IHEVCE_API_ERROR_START + 0x68, +#endif IHEVCE_VUI_DEPENDENCY = IHEVCE_API_ERROR_START + 0x69, +#ifndef DISABLE_SEI IHEVCE_SEI_ENABLED_VUI_DISABLED = IHEVCE_API_ERROR_START + 0x6A, IHEVCE_SEI_HASH_VALUE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x6B, +#endif /* Level related error codes */ IHEVCE_PIC_SIZE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x6C, @@ -385,7 +391,9 @@ typedef enum IHEVCE_ARCHITECTURE_TYPE_UNSUPPORTED = IHEVCE_API_ERROR_START + 0x95, +#ifndef DISABLE_SEI IHEVCE_SEI_PAYLOAD_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x96, +#endif IHEVCE_BAD_DIST_CFG_PARAMETERS = IHEVCE_API_ERROR_START + 0x97, @@ -393,7 +401,9 @@ typedef enum IHEVCE_INVALID_MRES_SINGLE_OUT = IHEVCE_API_ERROR_START + 0x99, +#ifndef DISABLE_SEI IHEVCE_SEI_CLL_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x9A, +#endif /** max failure error code to ensure enum is 32 bits wide */ IHEVCE_FAIL = 0xFFFFFFFF diff --git a/encoder/ihevce_frame_process.c b/encoder/ihevce_frame_process.c index ea4952c..5fdc3d7 100644 --- a/encoder/ihevce_frame_process.c +++ b/encoder/ihevce_frame_process.c @@ -3097,12 +3097,15 @@ WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt) ps_curr_inp->s_lap_out.i4_is_ref_pic; { - WORD32 sei_hash_enabled = - (ps_enc_ctxt->ps_stat_prms->s_out_strm_prms + WORD32 sei_hash_enabled; +#ifndef DISABLE_SEI + sei_hash_enabled = (ps_enc_ctxt->ps_stat_prms->s_out_strm_prms .i4_sei_enable_flag == 1) && (ps_enc_ctxt->ps_stat_prms->s_out_strm_prms .i4_decoded_pic_hash_sei_flag != 0); - +#else + sei_hash_enabled = 0; +#endif /* Deblock a picture for all reference frames unconditionally. */ /* Deblock non ref if psnr compute or save recon is enabled */ ps_frm_recon->i4_deblk_pad_hpel_cur_pic = @@ -3508,6 +3511,7 @@ WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt) ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]; //ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_enc_order_num = ps_curr_inp->s_lap_out.i4_enc_order_num; /*registered User Data Call*/ +#ifndef DISABLE_SEI if(ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_payload_enable_flag) { ihevce_fill_sei_payload( @@ -3516,6 +3520,7 @@ WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt) ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]); } +#endif /*derive end flag and input valid flag in output buffer */ if(NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]) { @@ -3568,12 +3573,14 @@ WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt) ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->ps_vps = &ps_enc_ctxt->as_vps[i4_bitrate_ctr]; +#ifndef DISABLE_SEI /* SEI header will be populated in pre-enocde stage */ memcpy( &ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->s_sei, &ps_curr_inp_from_me->s_sei, sizeof(sei_params_t)); +#endif /*AUD and EOS presnt flags are populated*/ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i1_aud_present_flag = ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_aud_enable_flags; @@ -4168,6 +4175,7 @@ WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt) if(1 == ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] ->i4_frm_proc_valid_flag) { +#ifndef DISABLE_SEI /* Calculate the SEI Hash if enabled */ if(0 != ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i] @@ -4198,6 +4206,7 @@ WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt) 0, 0); } +#endif /* Sending qp, poc and pic-type to entropy thread for printing on console */ if(ps_enc_ctxt->ps_stat_prms->i4_log_dump_level != 0) { @@ -5259,6 +5268,7 @@ void ihevce_pre_enc_init( ps_curr_out->ps_vps = &ps_enc_ctxt->as_vps[0]; } +#ifndef DISABLE_SEI /* By default, Sei messages are set to 0, to avoid unintialised memory access */ memset(&ps_curr_out->s_sei, 0, sizeof(sei_params_t)); @@ -5340,6 +5350,7 @@ void ihevce_pre_enc_init( ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag; } } +#endif /* For interlace pictures, first_field depends on topfield_first and bottom field */ if(i4_field_pic) diff --git a/encoder/ihevce_frame_process_utils.c b/encoder/ihevce_frame_process_utils.c index ee7eb49..1e867c6 100644 --- a/encoder/ihevce_frame_process_utils.c +++ b/encoder/ihevce_frame_process_utils.c @@ -308,6 +308,7 @@ unsigned int calc_block_ssim( } } +#ifndef DISABLE_SEI /*! ****************************************************************************** * \if Function name : ihevce_fill_sei_payload \endif @@ -392,6 +393,7 @@ void ihevce_fill_sei_payload( pu4_tag += 2; } } +#endif /*! ****************************************************************************** diff --git a/encoder/ihevce_frame_process_utils.h b/encoder/ihevce_frame_process_utils.h index fef5b44..1f89b0c 100644 --- a/encoder/ihevce_frame_process_utils.h +++ b/encoder/ihevce_frame_process_utils.h @@ -76,10 +76,12 @@ WORD32 ihevce_get_cur_frame_qp( WORD32 max_qp, rc_quant_t *ps_rc_quant_ctxt); +#ifndef DISABLE_SEI void ihevce_fill_sei_payload( enc_ctxt_t *ps_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, frm_proc_ent_cod_ctxt_t *ps_curr_out); +#endif void ihevce_dyn_bitrate(void *pv_hle_ctxt, void *pv_dyn_bitrate_prms); diff --git a/encoder/ihevce_hle_interface.c b/encoder/ihevce_hle_interface.c index 4c1af64..5f6065c 100644 --- a/encoder/ihevce_hle_interface.c +++ b/encoder/ihevce_hle_interface.c @@ -23,7 +23,7 @@ * \file ihevce_hle_interface.c * * \brief -* This file contains all the functions related High level enocder +* This file contains all the functions related High level encoder * Interface layer * * \date @@ -163,7 +163,7 @@ void ihevce_context_reset(enc_ctxt_t *ps_enc_ctxt) * \brief * High level Encoder create function * -* \param[in] High level enocder interface context pointer +* \param[in] High level encoder interface context pointer * * \return * success or fail @@ -515,11 +515,16 @@ IV_API_CALL_STATUS_T ihevce_query_io_buf_req( ps_input_bufs_req->i4_yuv_format = ps_src_prms->i4_chr_format; +#ifndef DISABLE_SEI ps_input_bufs_req->i4_min_size_synch_ctrl_bufs = ((MAX_SEI_PAYLOAD_PER_TLV + 16) * MAX_NUMBER_OF_SEI_PAYLOAD) + 16; ps_input_bufs_req->i4_min_size_asynch_ctrl_bufs = ((MAX_SEI_PAYLOAD_PER_TLV + 16) * (MAX_NUMBER_OF_SEI_PAYLOAD - 6)) + 16; +#else + ps_input_bufs_req->i4_min_size_synch_ctrl_bufs = 16; + ps_input_bufs_req->i4_min_size_asynch_ctrl_bufs = 16; +#endif for(i4_resolution_id_ctr = 0; i4_resolution_id_ctr < i4_num_resolutions; i4_resolution_id_ctr++) { diff --git a/encoder/ihevce_hle_interface.h b/encoder/ihevce_hle_interface.h index 2bdf3c1..5c0c898 100644 --- a/encoder/ihevce_hle_interface.h +++ b/encoder/ihevce_hle_interface.h @@ -22,7 +22,7 @@ * \file ihevce_hle_interface.h * * \brief -* This file contains infertace prototypes of High level enocder interafce +* This file contains infertace prototypes of High level encoder interafce * structure and interface functions. * * \date diff --git a/encoder/ihevce_lap_interface.c b/encoder/ihevce_lap_interface.c index 734c609..b441c47 100644 --- a/encoder/ihevce_lap_interface.c +++ b/encoder/ihevce_lap_interface.c @@ -548,7 +548,9 @@ void ihevce_lap_parse_sync_cmd( WORD32 *pi4_tag_parse = pi4_cmd_buf; WORD32 i4_cmd_size = ps_lap_inp_buf->s_input_buf.i4_cmd_buf_size; WORD32 i4_buf_id = ps_lap_inp_buf->s_input_buf.i4_buf_id; +#ifndef DISABLE_SEI UWORD32 u4_num_sei = 0; +#endif WORD32 i4_end_flag = 0; while(i4_cmd_size >= 4) @@ -568,7 +570,9 @@ void ihevce_lap_parse_sync_cmd( (*pi4_flush_check) = 1; pi4_tag_parse += 2; i4_cmd_size -= 8; +#ifndef DISABLE_SEI u4_num_sei++; +#endif break; case IHEVCE_SYNCH_API_FORCE_IDR_TAG: if(i4_cmd_size < 8 || pi4_tag_parse[1]) @@ -583,7 +587,9 @@ void ihevce_lap_parse_sync_cmd( (*pi4_force_idr_check) = 1; pi4_tag_parse += 2; i4_cmd_size -= 8; +#ifndef DISABLE_SEI u4_num_sei++; +#endif break; case IHEVCE_SYNCH_API_END_TAG: i4_end_flag = 1; @@ -597,9 +603,11 @@ void ihevce_lap_parse_sync_cmd( if(i4_end_flag) break; } +#ifndef DISABLE_SEI if(u4_num_sei > MAX_NUMBER_OF_SEI_PAYLOAD) //Checking for max number of SEI messages. ps_hle_ctxt->ihevce_cmds_error_report( ps_hle_ctxt->pv_cmd_err_cb_handle, IHEVCE_SYNCH_ERR_TOO_MANY_SEI_MSG, 1, i4_buf_id); +#endif if(!i4_end_flag) ps_hle_ctxt->ihevce_cmds_error_report( diff --git a/encoder/ihevce_memory_init.c b/encoder/ihevce_memory_init.c index 479ab3b..74d13eb 100644 --- a/encoder/ihevce_memory_init.c +++ b/encoder/ihevce_memory_init.c @@ -1340,6 +1340,7 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr total_memtabs_used++; total_system_memtabs++; +#ifndef DISABLE_SEI /* SEI Payload Data */ buf_size = sizeof(UWORD8) * MAX_NUMBER_OF_SEI_PAYLOAD * MAX_SEI_PAYLOAD_PER_TLV * NUM_FRMPROC_ENTCOD_BUFS; @@ -1352,6 +1353,7 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr /* increment the memtab counter */ total_memtabs_used++; total_system_memtabs++; +#endif } /* ------ Working mem frame level -------*/ @@ -1515,8 +1517,6 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr ps_intrf_ctxt->i4_error_code = IHEVCE_CANNOT_ALLOCATE_MEMORY; return; } - - memset(pu1_mem, 0, ps_memtab[ctr].i4_mem_size); } /* --------------------------------------------------------------------- */ @@ -2112,7 +2112,6 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr UWORD8 *pu1_coeffs; WORD32 num_ctb_in_frm; WORD32 coeff_size; - UWORD8 *pu1_sei_payload; /* frame process/entropy coding buffer pointer array */ pps_frm_proc_ent_cod_bufs[i] = (frm_proc_ent_cod_ctxt_t **)ps_memtab->pv_base; @@ -2149,9 +2148,12 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr /* increment the memtabs */ ps_memtab++; +#ifndef DISABLE_SEI /* CC User Data */ + UWORD8 *pu1_sei_payload; pu1_sei_payload = (UWORD8 *)ps_memtab->pv_base; ps_memtab++; +#endif num_ctb_in_frm = num_ctb_horz * num_ctb_vert; @@ -2164,7 +2166,6 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr /* loop to initialise all the memories */ for(ctr = 0; ctr < NUM_FRMPROC_ENTCOD_BUFS; ctr++) { - WORD32 num_sei; pps_frm_proc_ent_cod_bufs[i][ctr] = ps_frmp_ent_bufs; ps_frmp_ent_bufs->ps_frm_ctb_data = ps_ctb; @@ -2186,7 +2187,8 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr pu1_coeffs += coeff_size; - for(num_sei = 0; num_sei < MAX_NUMBER_OF_SEI_PAYLOAD; num_sei++) +#ifndef DISABLE_SEI + for(WORD32 num_sei = 0; num_sei < MAX_NUMBER_OF_SEI_PAYLOAD; num_sei++) { ps_frmp_ent_bufs->as_sei_payload[num_sei].pu1_sei_payload = pu1_sei_payload; ps_frmp_ent_bufs->as_sei_payload[num_sei].u4_payload_type = 0; @@ -2194,6 +2196,7 @@ void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intr pu1_sei_payload += MAX_SEI_PAYLOAD_PER_TLV; } +#endif ps_frmp_ent_bufs++; } } diff --git a/encoder/ihevce_plugin.c b/encoder/ihevce_plugin.c index 9dbf376..5013927 100644 --- a/encoder/ihevce_plugin.c +++ b/encoder/ihevce_plugin.c @@ -112,6 +112,8 @@ * * \brief * Memory manager specific alloc function +* it expects to reset the allocated memory and provide the zero initialised +* memory whenever this function getting called * * \param[in] pv_handle : handle to memory manager * (currently not required can be set to null) @@ -158,6 +160,10 @@ void mem_mngr_alloc(void *pv_handle, ihevce_sys_api_t *ps_sys_api, iv_mem_rec_t ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Unable to allocate memory\n"); ASSERT(0); } + else + { + memset(ps_memtab->pv_base, 0, ps_memtab->i4_mem_size); + } return; } @@ -167,6 +173,8 @@ void mem_mngr_alloc(void *pv_handle, ihevce_sys_api_t *ps_sys_api, iv_mem_rec_t * * \brief * common memory allocate function should be used across all threads +* it expects to reset the allocated memory and return the zero initialised +* memory pointer whenever this function getting called * * \param[in] pv_handle : handle to memory manager * (currently not required can be set to null) @@ -183,7 +191,12 @@ void mem_mngr_alloc(void *pv_handle, ihevce_sys_api_t *ps_sys_api, iv_mem_rec_t void *memory_alloc(void *pv_handle, UWORD32 u4_size) { (void)pv_handle; - return (malloc(u4_size)); + void *pv_buf = malloc(u4_size); + if(pv_buf) + { + memset(pv_buf, 0, u4_size); + } + return (pv_buf); } /*! @@ -341,6 +354,7 @@ IHEVCE_PLUGIN_STATUS_T ihevce_set_def_params(ihevce_static_cfg_params_t *ps_para ps_params->s_out_strm_prms.i4_codec_profile = 1; ps_params->s_out_strm_prms.i4_codec_tier = 0; ps_params->s_out_strm_prms.i4_codec_type = 0; +#ifndef DISABLE_SEI ps_params->s_out_strm_prms.i4_sei_buffer_period_flags = 0; ps_params->s_out_strm_prms.i4_sei_enable_flag = 0; ps_params->s_out_strm_prms.i4_sei_payload_enable_flag = 0; @@ -351,6 +365,7 @@ IHEVCE_PLUGIN_STATUS_T ihevce_set_def_params(ihevce_static_cfg_params_t *ps_para ps_params->s_out_strm_prms.i4_sei_recovery_point_flags = 0; ps_params->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags = 0; ps_params->s_out_strm_prms.i4_decoded_pic_hash_sei_flag = 0; +#endif ps_params->s_out_strm_prms.i4_sps_at_cdr_enable = 1; ps_params->s_out_strm_prms.i4_vui_enable = 0; /*Set the interoperability flag to 0*/ @@ -577,7 +592,7 @@ IV_API_CALL_STATUS_T * \if Function name : ihevce_plugin_init \endif * * \brief -* Initialises the enocder context and threads +* Initialises the encoder context and threads * * \param[in] Static params pointer * @@ -629,7 +644,6 @@ IHEVCE_PLUGIN_STATUS_T ihevce_init(ihevce_static_cfg_params_t *ps_params, void * ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in Plugin initialization\n"); return (IHEVCE_EFAIL); } - memset(ps_ctxt, 0, sizeof(plugin_ctxt_t)); /* initialise memory call backs */ ps_ctxt->ihevce_mem_alloc = memory_alloc; @@ -754,7 +768,6 @@ IHEVCE_PLUGIN_STATUS_T ihevce_init(ihevce_static_cfg_params_t *ps_params, void * "IHEVCE ERROR: Error in Plugin HLE memory initialization\n"); return (IHEVCE_EFAIL); } - memset(ps_interface_ctxt, 0, sizeof(ihevce_hle_ctxt_t)); ps_interface_ctxt->i4_size = sizeof(ihevce_hle_ctxt_t); ps_ctxt->pv_hle_interface_ctxt = ps_interface_ctxt; @@ -1490,7 +1503,7 @@ static IHEVCE_PLUGIN_STATUS_T * \if Function name : ihevce_close \endif * * \brief -* De-Initialises the enocder context and threads +* De-Initialises the encoder context and threads * * \param[in] Static params pointer * diff --git a/encoder/ihevce_rc_interface.c b/encoder/ihevce_rc_interface.c index d17dffc..a0a7101 100644 --- a/encoder/ihevce_rc_interface.c +++ b/encoder/ihevce_rc_interface.c @@ -1785,7 +1785,7 @@ WORD32 ihevce_get_L0_est_satd_based_scd_qp( WORD32 ihevce_rc_pre_enc_qp_query( void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out, WORD32 i4_update_delay) { - WORD32 scene_type, i4_is_scd = 0, i4_frame_qp, slice_type; + WORD32 scene_type, i4_is_scd = 0, i4_frame_qp, slice_type = ISLICE; rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt; rc_type_e e_rc_type = ps_rc_ctxt->e_rate_control_type; IV_PICTURE_CODING_TYPE_T pic_type = (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type; @@ -1855,6 +1855,11 @@ WORD32 ihevce_rc_pre_enc_qp_query( slice_type = BSLICE; break; } + default: + { + DBG_PRINTF("Invalid picture type %d\n", pic_type); + break; + } } i4_frame_qp = ihevce_get_cur_frame_qp( @@ -2086,7 +2091,7 @@ WORD32 ihevce_rc_get_pic_quant( WORD32 i4_frame_qp, i4_frame_qp_q6, i4_hevc_frame_qp = -1, i4_deltaQP = 0; WORD32 i4_max_frame_bits = (1 << 30); rc_type_e e_rc_type = ps_rc_ctxt->e_rate_control_type; - WORD32 slice_type, index, i4_num_frames_in_cur_gop, i4_cur_est_texture_bits; + WORD32 slice_type = ISLICE, index, i4_num_frames_in_cur_gop, i4_cur_est_texture_bits; WORD32 temporal_layer_id = ps_rc_lap_out->i4_rc_temporal_lyr_id; IV_PICTURE_CODING_TYPE_T pic_type = (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type; picture_type_e rc_pic_type = ihevce_rc_conv_pic_type( @@ -2140,6 +2145,11 @@ WORD32 ihevce_rc_get_pic_quant( slice_type = BSLICE; break; } + default: + { + DBG_PRINTF("Invalid picture type %d\n", pic_type); + break; + } } i4_frame_qp = ihevce_get_cur_frame_qp( diff --git a/encoder/libhevcenc.cmake b/encoder/libhevcenc.cmake index 2a0cc59..850b90e 100644 --- a/encoder/libhevcenc.cmake +++ b/encoder/libhevcenc.cmake @@ -95,7 +95,9 @@ list( include_directories(${HEVC_ROOT}/encoder) # arm/x86 sources -if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" +if("${SYSTEM_NAME}" STREQUAL "Darwin") + message("Assembly optimizations not supported for MacOS") +elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${SYSTEM_PROCESSOR}" STREQUAL "aarch32") list( APPEND diff --git a/encoder/osal_semaphore.c b/encoder/osal_semaphore.c index 9875159..46a7e26 100644 --- a/encoder/osal_semaphore.c +++ b/encoder/osal_semaphore.c @@ -49,6 +49,10 @@ /* System include files */ #include +#ifdef DARWIN +#include +#include +#endif #include #include @@ -119,6 +123,25 @@ void *osal_sem_create(IN void *osal_handle, IN osal_sem_attr_t *attr) sem_handle->hdl = handle; /* Create a sempahore */ +#ifdef DARWIN + static int sem_counter = 0; + char sem_name[32]; + snprintf(sem_name, sizeof(sem_name), "/osal_sem_%d", sem_counter++); + + sem_t *sem = sem_open(sem_name, O_CREAT, 0644, attr->value); + if(sem == SEM_FAILED){ + handle->free(sem_handle->mmr_handle, sem_handle); + return 0; + } + sem_handle->sem_handle = sem; + sem_handle->sem_name = strdup(sem_name); + if(sem_handle->sem_name == NULL){ + sem_close(sem); + sem_unlink(sem_name); + handle->free(sem_handle->mmr_handle, sem_handle); + return 0; + } +#else if(-1 == sem_init( &(sem_handle->sem_handle), /* Semaphore handle */ 0, /* Shared only between threads */ @@ -127,6 +150,7 @@ void *osal_sem_create(IN void *osal_handle, IN osal_sem_attr_t *attr) handle->free(sem_handle->mmr_handle, sem_handle); return 0; } +#endif return sem_handle; } @@ -171,11 +195,20 @@ WORD32 osal_sem_destroy(IN void *sem_handle) return OSAL_ERROR; /* Destroy the semaphore */ +#ifdef DARWIN + if(0 == sem_close(handle->sem_handle) && 0 == sem_unlink(handle->sem_name)) + { + free(handle->sem_name); + handle->hdl->free(handle->mmr_handle, handle); + return OSAL_SUCCESS; + } +#else if(0 == sem_destroy(&(handle->sem_handle))) { handle->hdl->free(handle->mmr_handle, handle); return OSAL_SUCCESS; } +#endif return OSAL_ERROR; } @@ -219,7 +252,11 @@ WORD32 osal_sem_wait(IN void *sem_handle) sem_handle_t *handle = (sem_handle_t *)sem_handle; /* Wait on Semaphore object infinitly */ +#ifdef DARWIN + return sem_wait(handle->sem_handle); +#else return sem_wait(&(handle->sem_handle)); +#endif } } @@ -258,7 +295,11 @@ WORD32 osal_sem_post(IN void *sem_handle) sem_handle_t *handle = (sem_handle_t *)sem_handle; /* Semaphore Post */ +#ifdef DARWIN + return sem_post(handle->sem_handle); +#else return sem_post(&(handle->sem_handle)); +#endif } } @@ -297,8 +338,13 @@ WORD32 osal_sem_count(IN void *sem_handle, OUT WORD32 *count) { sem_handle_t *handle = (sem_handle_t *)sem_handle; +#ifdef DARWIN + if(-1 == sem_getvalue(handle->sem_handle, count)) + return OSAL_ERROR; +#else if(-1 == sem_getvalue(&(handle->sem_handle), count)) return OSAL_ERROR; +#endif return OSAL_SUCCESS; } diff --git a/encoder/osal_semaphore.h b/encoder/osal_semaphore.h index e42b9fa..3421ece 100644 --- a/encoder/osal_semaphore.h +++ b/encoder/osal_semaphore.h @@ -56,7 +56,12 @@ /* typedef integer. */ typedef struct { +#ifdef DARWIN + sem_t *sem_handle; /* Semaphore handle */ + char *sem_name; /* Semaphore name */ +#else sem_t sem_handle; /* Semaphore handle */ +#endif void *mmr_handle; /* Pointer to memory manager handle */ osal_t *hdl; /* Associated OSAL handle */ diff --git a/encoder/osal_thread.c b/encoder/osal_thread.c index 76b4495..9fe2a41 100644 --- a/encoder/osal_thread.c +++ b/encoder/osal_thread.c @@ -66,7 +66,9 @@ #include #include #include /*for CPU_SET, etc.. */ -#include +#ifndef DARWIN +#include +#endif #include /* User include files */ @@ -296,14 +298,19 @@ WORD32 osal_thread_sleep(IN UWORD32 milli_seconds) timer.tv_sec = milli_seconds / 1000; milli_seconds -= (timer.tv_sec * 1000); timer.tv_nsec = milli_seconds * MEGA_CONST; - +#ifdef DARWIN + if(0 == nanosleep(&timer, NULL)) + { + return OSAL_SUCCESS; + } +#else /* Using Monotonic clock to sleep, also flag is set to 0 for relative */ /* time to current clock time */ if(0 == clock_nanosleep(CLOCK_MONOTONIC, 0, &timer, NULL)) { return OSAL_SUCCESS; } - +#endif return OSAL_ERROR; } } @@ -703,5 +710,11 @@ void osal_print_last_error(IN const STRWORD8 *string) WORD32 osal_get_current_tid(void) { +#ifdef DARWIN + uint64_t tid; + pthread_threadid_np(NULL, &tid); + return tid; +#else return syscall(__NR_gettid); +#endif } diff --git a/fuzzer/Android.bp b/fuzzer/Android.bp index 17bcca0..05ce8db 100644 --- a/fuzzer/Android.bp +++ b/fuzzer/Android.bp @@ -9,6 +9,10 @@ package { cc_fuzz { name: "hevc_dec_fuzzer", + //TODO: b/485868924 + defaults: [ + "no_bti", + ], host_supported: true, srcs: [ "hevc_dec_fuzzer.cpp", @@ -41,6 +45,10 @@ cc_fuzz { cc_fuzz { name: "hevc_enc_fuzzer", + //TODO: b/485868924 + defaults: [ + "no_bti", + ], host_supported: true, srcs: [ "hevc_enc_fuzzer.cpp", diff --git a/fuzzer/hevc_dec_fuzzer.cmake b/fuzzer/hevc_dec_fuzzer.cmake index 28e89fd..43c5c9d 100644 --- a/fuzzer/hevc_dec_fuzzer.cmake +++ b/fuzzer/hevc_dec_fuzzer.cmake @@ -1,2 +1,4 @@ -libhevc_add_fuzzer(hevc_dec_fuzzer libhevcdec SOURCES - ${HEVC_ROOT}/fuzzer/hevc_dec_fuzzer.cpp) +if(NOT "${SYSTEM_NAME}" STREQUAL "Darwin") + libhevc_add_fuzzer(hevc_dec_fuzzer libhevcdec SOURCES + ${HEVC_ROOT}/fuzzer/hevc_dec_fuzzer.cpp) +endif() \ No newline at end of file diff --git a/fuzzer/hevc_dec_fuzzer.cpp b/fuzzer/hevc_dec_fuzzer.cpp index 8dec5da..3c55b1e 100644 --- a/fuzzer/hevc_dec_fuzzer.cpp +++ b/fuzzer/hevc_dec_fuzzer.cpp @@ -32,11 +32,15 @@ #include "iv.h" #include "ivd.h" +#include "fuzzer/FuzzedDataProvider.h" + #define NELEMENTS(x) (sizeof(x) / sizeof(x[0])) #define ivd_api_function ihevcd_cxa_api_function const IV_COLOR_FORMAT_T supportedColorFormats[] = { IV_YUV_420P, IV_YUV_420SP_UV, IV_YUV_420SP_VU, - IV_YUV_422ILE, IV_RGB_565, IV_RGBA_8888}; + IV_GRAY}; + +const uint32_t enableYuvFormatBitFields[] = {0, 1, 2, 3}; /* Decoder ignores invalid arch, i.e. for arm build, if SSSE3 is requested, * decoder defaults to a supported configuration. So same set of supported @@ -45,17 +49,10 @@ const IVD_ARCH_T supportedArchitectures[] = { ARCH_ARM_NONEON, ARCH_ARM_A9Q, ARCH_ARM_NEONINTR, ARCH_ARMV8_GENERIC, ARCH_X86_GENERIC, ARCH_X86_SSSE3, ARCH_X86_SSE42}; -enum { - OFFSET_COLOR_FORMAT = 6, - OFFSET_NUM_CORES, - OFFSET_ARCH, - /* Should be the last entry */ - OFFSET_MAX, -}; - const static int kMaxNumDecodeCalls = 100; const static int kSupportedColorFormats = NELEMENTS(supportedColorFormats); const static int kSupportedArchitectures = NELEMENTS(supportedArchitectures); +const static int kEnableYuvFormatBitFields = NELEMENTS(enableYuvFormatBitFields); const static int kMaxCores = 4; void *iv_aligned_malloc(void *ctxt, WORD32 alignment, WORD32 size) { void *buf = NULL; @@ -73,33 +70,32 @@ void iv_aligned_free(void *ctxt, void *buf) { class Codec { public: - Codec(IV_COLOR_FORMAT_T colorFormat, size_t numCores); + Codec(FuzzedDataProvider &fdp); ~Codec(); - void createCodec(); + void createCodec(FuzzedDataProvider &fdp); void deleteCodec(); void resetCodec(); - void setCores(); + void setCores(FuzzedDataProvider &fdp); void allocFrame(); void freeFrame(); void decodeHeader(const uint8_t *data, size_t size); IV_API_CALL_STATUS_T decodeFrame(const uint8_t *data, size_t size, size_t *bytesConsumed); void setParams(IVD_VIDEO_DECODE_MODE_T mode); - void setArchitecture(IVD_ARCH_T arch); + void setArchitecture(FuzzedDataProvider &fdp); private: IV_COLOR_FORMAT_T mColorFormat; - size_t mNumCores; iv_obj_t *mCodec; ivd_out_bufdesc_t mOutBufHandle; uint32_t mWidth; uint32_t mHeight; }; -Codec::Codec(IV_COLOR_FORMAT_T colorFormat, size_t numCores) { - mColorFormat = colorFormat; - mNumCores = numCores; +Codec::Codec(FuzzedDataProvider &fdp) { + mColorFormat = + (IV_COLOR_FORMAT_T)fdp.PickValueInArray(supportedColorFormats); mCodec = nullptr; mWidth = 0; mHeight = 0; @@ -109,7 +105,7 @@ Codec::Codec(IV_COLOR_FORMAT_T colorFormat, size_t numCores) { Codec::~Codec() {} -void Codec::createCodec() { +void Codec::createCodec(FuzzedDataProvider &fdp) { IV_API_CALL_STATUS_T ret; ihevcd_cxa_create_ip_t create_ip{}; ihevcd_cxa_create_op_t create_op{}; @@ -118,10 +114,12 @@ void Codec::createCodec() { create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE; create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 0; create_ip.s_ivd_create_ip_t.e_output_format = mColorFormat; + create_ip.u4_keep_threads_active = 1; create_ip.s_ivd_create_ip_t.pf_aligned_alloc = iv_aligned_malloc; create_ip.s_ivd_create_ip_t.pf_aligned_free = iv_aligned_free; create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL; create_ip.s_ivd_create_ip_t.u4_size = sizeof(ihevcd_cxa_create_ip_t); + create_ip.u4_enable_yuv_formats = fdp.PickValueInArray(enableYuvFormatBitFields); create_op.s_ivd_create_op_t.u4_size = sizeof(ihevcd_cxa_create_op_t); ret = ivd_api_function(NULL, (void *)&create_ip, (void *)&create_op); @@ -156,14 +154,13 @@ void Codec::resetCodec() { ivd_api_function(mCodec, (void *)&s_ctl_ip, (void *)&s_ctl_op); } -void Codec::setCores() { +void Codec::setCores(FuzzedDataProvider &fdp) { ihevcd_cxa_ctl_set_num_cores_ip_t s_ctl_ip{}; ihevcd_cxa_ctl_set_num_cores_op_t s_ctl_op{}; - s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IHEVCD_CXA_CMD_CTL_SET_NUM_CORES; - s_ctl_ip.u4_num_cores = mNumCores; + s_ctl_ip.u4_num_cores = (fdp.ConsumeIntegral() % kMaxCores) + 1; s_ctl_ip.u4_size = sizeof(ihevcd_cxa_ctl_set_num_cores_ip_t); s_ctl_op.u4_size = sizeof(ihevcd_cxa_ctl_set_num_cores_op_t); @@ -186,14 +183,13 @@ void Codec::setParams(IVD_VIDEO_DECODE_MODE_T mode) { ivd_api_function(mCodec, (void *)&s_ctl_ip, (void *)&s_ctl_op); } -void Codec::setArchitecture(IVD_ARCH_T arch) { +void Codec::setArchitecture(FuzzedDataProvider &fdp) { ihevcd_cxa_ctl_set_processor_ip_t s_ctl_ip{}; ihevcd_cxa_ctl_set_processor_op_t s_ctl_op{}; - s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IHEVCD_CXA_CMD_CTL_SET_PROCESSOR; - s_ctl_ip.u4_arch = arch; + s_ctl_ip.u4_arch = (IVD_ARCH_T)fdp.PickValueInArray(supportedArchitectures); s_ctl_ip.u4_soc = SOC_GENERIC; s_ctl_ip.u4_size = sizeof(ihevcd_cxa_ctl_set_processor_ip_t); s_ctl_op.u4_size = sizeof(ihevcd_cxa_ctl_set_processor_op_t); @@ -225,16 +221,8 @@ void Codec::allocFrame() { sizes[1] = mWidth * mHeight >> 1; num_bufs = 2; break; - case IV_YUV_422ILE: - sizes[0] = mWidth * mHeight * 2; - num_bufs = 1; - break; - case IV_RGB_565: - sizes[0] = mWidth * mHeight * 2; - num_bufs = 1; - break; - case IV_RGBA_8888: - sizes[0] = mWidth * mHeight * 4; + case IV_GRAY: + sizes[0] = mWidth * mHeight; num_bufs = 1; break; case IV_YUV_420P: @@ -256,7 +244,9 @@ void Codec::allocFrame() { void Codec::decodeHeader(const uint8_t *data, size_t size) { setParams(IVD_DECODE_HEADER); - while (size > 0) { + size_t numDecodeCalls = 0; + + while (size > 0 && numDecodeCalls < kMaxNumDecodeCalls) { IV_API_CALL_STATUS_T ret; ivd_video_decode_ip_t dec_ip{}; ivd_video_decode_op_t dec_op{}; @@ -283,6 +273,7 @@ void Codec::decodeHeader(const uint8_t *data, size_t size) { data += bytes_consumed; size -= bytes_consumed; + numDecodeCalls++; mWidth = std::min(dec_op.u4_pic_wd, (UWORD32)10240); mHeight = std::min(dec_op.u4_pic_ht, (UWORD32)10240); @@ -343,20 +334,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size < 1) { return 0; } - size_t colorFormatOfst = std::min((size_t)OFFSET_COLOR_FORMAT, size - 1); - size_t numCoresOfst = std::min((size_t)OFFSET_NUM_CORES, size - 1); - size_t architectureOfst = std::min((size_t)OFFSET_ARCH, size - 1); - size_t architectureIdx = data[architectureOfst] % kSupportedArchitectures; - IVD_ARCH_T arch = (IVD_ARCH_T)supportedArchitectures[architectureIdx]; - size_t colorFormatIdx = data[colorFormatOfst] % kSupportedColorFormats; - IV_COLOR_FORMAT_T colorFormat = - (IV_COLOR_FORMAT_T)(supportedColorFormats[colorFormatIdx]); - uint32_t numCores = (data[numCoresOfst] % kMaxCores) + 1; + FuzzedDataProvider fdp(data, size); + size_t numDecodeCalls = 0; - Codec *codec = new Codec(colorFormat, numCores); - codec->createCodec(); - codec->setArchitecture(arch); - codec->setCores(); + Codec *codec = new Codec(fdp); + codec->createCodec(fdp); + codec->setArchitecture(fdp); + codec->setCores(fdp); codec->decodeHeader(data, size); codec->setParams(IVD_DECODE_FRAME); codec->allocFrame(); diff --git a/fuzzer/hevc_enc_fuzzer.cmake b/fuzzer/hevc_enc_fuzzer.cmake index 908b2f1..29371bd 100644 --- a/fuzzer/hevc_enc_fuzzer.cmake +++ b/fuzzer/hevc_enc_fuzzer.cmake @@ -1,2 +1,4 @@ -libhevc_add_fuzzer(hevc_enc_fuzzer libhevcenc SOURCES - ${HEVC_ROOT}/fuzzer/hevc_enc_fuzzer.cpp) +if(NOT "${SYSTEM_NAME}" STREQUAL "Darwin") + libhevc_add_fuzzer(hevc_enc_fuzzer libhevcenc SOURCES + ${HEVC_ROOT}/fuzzer/hevc_enc_fuzzer.cpp) +endif() \ No newline at end of file diff --git a/fuzzer/hevc_enc_fuzzer.cpp b/fuzzer/hevc_enc_fuzzer.cpp index 3dc2f4d..0c8e69f 100644 --- a/fuzzer/hevc_enc_fuzzer.cpp +++ b/fuzzer/hevc_enc_fuzzer.cpp @@ -18,6 +18,7 @@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ #include +#include #include #include diff --git a/fuzzer/ossfuzz.sh b/fuzzer/ossfuzz.sh index ee1f1c7..2145c75 100755 --- a/fuzzer/ossfuzz.sh +++ b/fuzzer/ossfuzz.sh @@ -18,10 +18,10 @@ test "${SRC}" != "" || exit 1 test "${WORK}" != "" || exit 1 test "${OUT}" != "" || exit 1 -#Opt out of null and shift sanitizers in undefined sanitizer +#Opt out of shift sanitizer in undefined sanitizer if [[ $SANITIZER = *undefined* ]]; then - CFLAGS="$CFLAGS -fno-sanitize=null,shift" - CXXFLAGS="$CXXFLAGS -fno-sanitize=null,shift" + CFLAGS="$CFLAGS -fno-sanitize=shift" + CXXFLAGS="$CXXFLAGS -fno-sanitize=shift" fi # Build libhevc diff --git a/test/decoder/main.c b/test/decoder/main.c index 8028de3..ce93527 100644 --- a/test/decoder/main.c +++ b/test/decoder/main.c @@ -50,6 +50,7 @@ #include "iv.h" #include "ivd.h" +#include "ihevc_defs.h" #include "ihevcd_cxa.h" #include "ithread.h" @@ -227,6 +228,12 @@ typedef struct WORD32 quit; WORD32 paused; + /* Enable YUV formats */ + UWORD32 u4_enable_yuv_formats; + + /* Active threads present*/ + UWORD32 i4_active_threads; + void *pv_disp_ctx; void *display_thread_handle; @@ -277,6 +284,9 @@ typedef enum SOC, PICLEN, PICLEN_FILE, + + ENABLE_YUV_FORMAT, + KEEP_THREADS_ACTIVE, }ARGUMENT_T; typedef struct @@ -317,7 +327,7 @@ static const argument_t argument_mapping[] = { "--", "--save_chksum", SAVE_CHKSUM, "Save Check sum file\n" }, { "--", "--chroma_format", CHROMA_FORMAT, - "Output Chroma format Supported values YUV_420P, YUV_422ILE, RGB_565, YUV_420SP_UV, YUV_420SP_VU\n" }, + "Output Chroma format Supported values YUV_420P, YUV_420SP_UV, YUV_420SP_VU, GRAY, YUV_444P\n" }, { "-n", "--num_frames", NUM_FRAMES, "Number of frames to be decoded\n" }, { "--", "--num_cores", NUM_CORES, @@ -342,6 +352,10 @@ static const argument_t argument_mapping[] = "Set Architecture. Supported values ARM_NONEON, ARM_A9Q, ARM_A7, ARM_A5, ARM_NEONINTR, X86_GENERIC, X86_SSSE3, X86_SSE4 \n" }, { "--", "--soc", SOC, "Set SOC. Supported values GENERIC, HISI_37X \n" }, + { "--", "--enable_yuv_format", ENABLE_YUV_FORMAT, + "Enable specific YUV formats" }, + {"--", "--keep_threads_active", KEEP_THREADS_ACTIVE, + "Keep threads active"}, }; #define PEAK_WINDOW_SIZE 8 @@ -854,6 +868,7 @@ IV_API_CALL_STATUS_T get_version(void *codec_obj) /*****************************************************************************/ void codec_exit(CHAR *pc_err_message) { + printf("Summary\n"); printf("%s\n", pc_err_message); exit(-1); } @@ -957,7 +972,10 @@ void dump_output(vid_dec_ctx_t *ps_app_ctx, if(NULL == s_dump_disp_frm_buf.pv_y_buf) return; - if(ps_app_ctx->e_output_chroma_format == IV_YUV_420P) + if(ps_app_ctx->e_output_chroma_format == IV_YUV_420P + || ps_app_ctx->e_output_chroma_format == IV_YUV_444P + || ps_app_ctx->e_output_chroma_format == IV_YUV_422P + || ps_app_ctx->e_output_chroma_format == IV_GRAY) { #if DUMP_SINGLE_BUF { @@ -979,17 +997,20 @@ void dump_output(vid_dec_ctx_t *ps_app_ctx, buf += s_dump_disp_frm_buf.u4_y_strd; } - buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf; - for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++) + if(ps_app_ctx->e_output_chroma_format != IV_GRAY) { - fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file); - buf += s_dump_disp_frm_buf.u4_u_strd; - } - buf = (UWORD8 *)s_dump_disp_frm_buf.pv_v_buf; - for(i = 0; i < s_dump_disp_frm_buf.u4_v_ht; i++) - { - fwrite(buf, 1, s_dump_disp_frm_buf.u4_v_wd, ps_op_file); - buf += s_dump_disp_frm_buf.u4_v_strd; + buf = (UWORD8*)s_dump_disp_frm_buf.pv_u_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_u_strd; + } + buf = (UWORD8*)s_dump_disp_frm_buf.pv_v_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_v_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_v_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_v_strd; + } } } @@ -997,27 +1018,30 @@ void dump_output(vid_dec_ctx_t *ps_app_ctx, if(0 != chksum_save) { UWORD8 au1_y_chksum[16]; - UWORD8 au1_u_chksum[16]; - UWORD8 au1_v_chksum[16]; calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_y_buf, s_dump_disp_frm_buf.u4_y_strd, s_dump_disp_frm_buf.u4_y_wd, s_dump_disp_frm_buf.u4_y_ht, au1_y_chksum); - calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_u_buf, - s_dump_disp_frm_buf.u4_u_strd, - s_dump_disp_frm_buf.u4_u_wd, - s_dump_disp_frm_buf.u4_u_ht, - au1_u_chksum); - calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_v_buf, - s_dump_disp_frm_buf.u4_v_strd, - s_dump_disp_frm_buf.u4_v_wd, - s_dump_disp_frm_buf.u4_v_ht, - au1_v_chksum); - fwrite(au1_y_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); - fwrite(au1_u_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); - fwrite(au1_v_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + + if(ps_app_ctx->e_output_chroma_format != IV_GRAY) + { + UWORD8 au1_u_chksum[16]; + UWORD8 au1_v_chksum[16]; + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_u_buf, + s_dump_disp_frm_buf.u4_u_strd, + s_dump_disp_frm_buf.u4_u_wd, + s_dump_disp_frm_buf.u4_u_ht, + au1_u_chksum); + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_v_buf, + s_dump_disp_frm_buf.u4_v_strd, + s_dump_disp_frm_buf.u4_v_wd, + s_dump_disp_frm_buf.u4_v_ht, + au1_v_chksum); + fwrite(au1_u_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + fwrite(au1_v_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + } } #endif } @@ -1052,28 +1076,6 @@ void dump_output(vid_dec_ctx_t *ps_app_ctx, } #endif } - else if(ps_app_ctx->e_output_chroma_format == IV_RGBA_8888) - { - UWORD8 *buf; - - buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; - for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) - { - fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd * 4, ps_op_file); - buf += s_dump_disp_frm_buf.u4_y_strd * 4; - } - } - else - { - UWORD8 *buf; - - buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; - for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) - { - fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_strd * 2, ps_op_file); - buf += s_dump_disp_frm_buf.u4_y_strd * 2; - } - } fflush(ps_op_file); fflush(ps_op_chksum_file); @@ -1185,7 +1187,6 @@ ARGUMENT_T get_argument(CHAR *name) void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value) { ARGUMENT_T arg; - arg = get_argument(argument); switch(arg) { @@ -1230,16 +1231,16 @@ void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value) case CHROMA_FORMAT: if((strcmp(value, "YUV_420P")) == 0) ps_app_ctx->e_output_chroma_format = IV_YUV_420P; - else if((strcmp(value, "YUV_422ILE")) == 0) - ps_app_ctx->e_output_chroma_format = IV_YUV_422ILE; - else if((strcmp(value, "RGB_565")) == 0) - ps_app_ctx->e_output_chroma_format = IV_RGB_565; - else if((strcmp(value, "RGBA_8888")) == 0) - ps_app_ctx->e_output_chroma_format = IV_RGBA_8888; + else if((strcmp(value, "YUV_444P")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_444P; else if((strcmp(value, "YUV_420SP_UV")) == 0) ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_UV; else if((strcmp(value, "YUV_420SP_VU")) == 0) ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_VU; + else if((strcmp(value, "GRAY")) == 0) + ps_app_ctx->e_output_chroma_format = IV_GRAY; + else if((strcmp(value, "YUV_422P")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_422P; else { printf("\nInvalid colour format setting it to IV_YUV_420P\n"); @@ -1335,6 +1336,14 @@ void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value) sscanf(value, "%s", ps_app_ctx->ac_piclen_fname); break; + case ENABLE_YUV_FORMAT: + sscanf(value, "%d", &ps_app_ctx->u4_enable_yuv_formats); + break; + + case KEEP_THREADS_ACTIVE: + sscanf(value, "%d", &ps_app_ctx->i4_active_threads); + break; + case INVALID: default: printf("Ignoring argument : %s\n", argument); @@ -1383,7 +1392,7 @@ void read_cfg_file(vid_dec_ctx_t *ps_app_ctx, FILE *fp_cfg_file) argument[0] = '\0'; /* Reading Input File Name */ sscanf(line, "%s %s %s", argument, value, description); - if(argument[0] == '\0') + if(argument[0] == '\0' || argument[0] == '#') continue; parse_argument(ps_app_ctx, argument, value); @@ -1879,6 +1888,7 @@ int main(WORD32 argc, CHAR *argv[]) s_app_ctx.full_screen = 0; s_app_ctx.u4_piclen_flag = 0; s_app_ctx.u4_frame_info_enable = 0; + s_app_ctx.i4_active_threads = 1; s_app_ctx.fps = DEFAULT_FPS; file_pos = 0; total_bytes_comsumed = 0; @@ -2157,6 +2167,8 @@ int main(WORD32 argc, CHAR *argv[]) s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ihevcd_cxa_create_ip_t); s_create_op.s_ivd_create_op_t.u4_size = sizeof(ihevcd_cxa_create_op_t); s_create_ip.u4_enable_frame_info = s_app_ctx.u4_frame_info_enable; + s_create_ip.u4_enable_yuv_formats = s_app_ctx.u4_enable_yuv_formats; + s_create_ip.u4_keep_threads_active = s_app_ctx.i4_active_threads; @@ -2405,23 +2417,23 @@ int main(WORD32 argc, CHAR *argv[]) s_ctl_op.u4_min_out_buf_size[2] = 0; break; } - case IV_YUV_422ILE: + case IV_YUV_422P: { - s_ctl_op.u4_min_out_buf_size[0] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT * 2; - s_ctl_op.u4_min_out_buf_size[1] = 0; - s_ctl_op.u4_min_out_buf_size[2] = 0; + s_ctl_op.u4_min_out_buf_size[0] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT; + s_ctl_op.u4_min_out_buf_size[1] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT >> 1; + s_ctl_op.u4_min_out_buf_size[2] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT >> 1; break; } - case IV_RGBA_8888: + case IV_YUV_444P: { - s_ctl_op.u4_min_out_buf_size[0] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT * 4; - s_ctl_op.u4_min_out_buf_size[1] = 0; - s_ctl_op.u4_min_out_buf_size[2] = 0; + s_ctl_op.u4_min_out_buf_size[0] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT; + s_ctl_op.u4_min_out_buf_size[1] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT; + s_ctl_op.u4_min_out_buf_size[2] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT; break; } - case IV_RGB_565: + case IV_GRAY: { - s_ctl_op.u4_min_out_buf_size[0] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT * 2; + s_ctl_op.u4_min_out_buf_size[0] = ADAPTIVE_MAX_WD * ADAPTIVE_MAX_HT; s_ctl_op.u4_min_out_buf_size[1] = 0; s_ctl_op.u4_min_out_buf_size[2] = 0; break; @@ -3024,7 +3036,6 @@ int main(WORD32 argc, CHAR *argv[]) } - if((1 == s_app_ctx.display) && (1 == ps_video_decode_op->u4_output_present)) { diff --git a/test/decoder/test.cfg b/test/decoder/test.cfg index d0c6e29..44cb400 100644 --- a/test/decoder/test.cfg +++ b/test/decoder/test.cfg @@ -1,14 +1,22 @@ ---input /data/local/tmp/hevcdec/crew_720p_2mbps.265 ---save_output 0 +### Decoder configuration file + +### Source and Destination Files +--input str.bin +--output out.yuv + + ### Destination Control Flags +--save_output 1 --num_frames -1 ---output /data/local/tmp/hevcdec/out.yuv ---chroma_format YUV_420P ---share_display_buf 0 ---max_wd 1920 ---max_ht 1080 ---max_level 41 ---num_cores 2 --loopback 0 +--chroma_format YUV_420P # options: {YUV_420P, YUV_420SP_UV, YUV_420SP_VU, GRAY, YUV_444P, YUV_422P} + +### Profile Settings +--enable_yuv_format 13 # options: {bit 0: 400, bit 1: X, bit 2: 422, bit 3: 444 } + +### Display settings +--share_display_buf 0 --display 0 ---arch ARM_A9Q ---soc GENERIC + +### Performance Settings +--num_cores 1 +--arch X86_GENERIC # options: {ARM_NONEON, ARM_A9Q, ARMV8_GENERIC, X86_GENERIC, X86_SSSE3, X86_SSE42} diff --git a/tests/common/common.cmake b/tests/common/common.cmake new file mode 100644 index 0000000..ebeb7ca --- /dev/null +++ b/tests/common/common.cmake @@ -0,0 +1,22 @@ +enable_testing() +libhevc_add_gtest_executable( + ihevc_luma_inter_pred_test + SOURCES ${HEVC_ROOT}/tests/common/ihevc_luma_inter_pred_test.cc +) + +libhevc_add_gtest_executable( + ihevc_luma_intra_pred_test + SOURCES ${HEVC_ROOT}/tests/common/ihevc_luma_intra_pred_test.cc +) + +libhevc_add_gtest_executable( + ihevc_itrans_res_test + SOURCES ${HEVC_ROOT}/tests/common/ihevc_itrans_res_test.cc +) + +libhevc_add_gtest_executable( + ihevc_itrans_recon_test + SOURCES ${HEVC_ROOT}/tests/common/ihevc_itrans_recon_test.cc +) + +include(GoogleTest) diff --git a/tests/common/func_selector.cc b/tests/common/func_selector.cc new file mode 100644 index 0000000..c35d32e --- /dev/null +++ b/tests/common/func_selector.cc @@ -0,0 +1,115 @@ +/****************************************************************************** + * + * Copyright (C) 2026 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include "ihevc_typedefs.h" +#include "ihevc_inter_pred.h" +#include "ihevcd_function_selector.h" +#include "iv.h" +#include "ivd.h" +// clang-format on + +const func_selector_t ref = []() { + func_selector_t ret = {}; +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) + ihevcd_init_function_ptr_generic(&ret); +#elif defined(__aarch64__) || defined(__arm__) + ihevcd_init_function_ptr_noneon(&ret); +#endif + return ret; +}(); + +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) +const func_selector_t test_ssse3 = []() { + func_selector_t ret = {}; + ihevcd_init_function_ptr_ssse3(&ret); + return ret; +}(); + +const func_selector_t test_sse42 = []() { + func_selector_t ret = {}; + ihevcd_init_function_ptr_sse42(&ret); + return ret; +}(); + +#ifndef DISABLE_AVX2 +const func_selector_t test_avx2 = []() { + func_selector_t ret = {}; + ihevcd_init_function_ptr_avx2(&ret); + return ret; +}(); +#endif +#elif defined(__aarch64__) +const func_selector_t test_arm64 = []() { + func_selector_t ret = {}; +#ifdef DARWIN + ihevcd_init_function_ptr_noneon(&ret); +#else + ihevcd_init_function_ptr_av8(&ret); +#endif + return ret; +}(); +#elif defined(__arm__) +const func_selector_t test_arm32 = []() { + func_selector_t ret = {}; +#ifdef DARWIN + ihevcd_init_function_ptr_noneon(&ret); +#else + ihevcd_init_function_ptr_a9q(&ret); +#endif + return ret; +}(); +#endif + +const func_selector_t *get_ref_func_ptr() { return &ref; } + +const func_selector_t *get_tst_func_ptr(IVD_ARCH_T arch) { + switch (arch) { +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) + case ARCH_X86_SSSE3: + return &test_ssse3; + case ARCH_X86_SSE42: + return &test_sse42; +#ifndef DISABLE_AVX2 + case ARCH_X86_AVX2: + return &test_avx2; +#endif +#elif defined(__aarch64__) + case ARCH_ARMV8_GENERIC: + return &test_arm64; +#elif defined(__arm__) + case ARCH_ARM_A9Q: + return &test_arm32; +#endif + default: + return nullptr; + } +} diff --git a/tests/common/func_selector.h b/tests/common/func_selector.h new file mode 100644 index 0000000..5738aef --- /dev/null +++ b/tests/common/func_selector.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * + * Copyright (C) 2026 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ +#ifndef __FUNC_SELECTOR_H__ +#define __FUNC_SELECTOR_H__ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include "ihevc_typedefs.h" +#include "ihevc_inter_pred.h" +#include "ihevcd_function_selector.h" +#include "iv.h" +#include "ivd.h" +// clang-format on + +const func_selector_t *get_ref_func_ptr(); +const func_selector_t *get_tst_func_ptr(IVD_ARCH_T arch); + +#endif /* __FUNC_SELECTOR_H__ */ \ No newline at end of file diff --git a/tests/common/ihevc_itrans_recon_test.cc b/tests/common/ihevc_itrans_recon_test.cc new file mode 100644 index 0000000..77a4ede --- /dev/null +++ b/tests/common/ihevc_itrans_recon_test.cc @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include + +#include "func_selector.h" +#include "ihevc_defs.h" +#include "ihevc_itrans_recon.h" +#include "ihevc_macros.h" +#include "ihevc_structs.h" +#include "ihevc_typedefs.h" +#include "tests_common.h" + +namespace { + +// Test parameters: trans_size, ttype (0: normal, 1: ttype1), arch, +// non_zero_rows, non_zero_cols (number of non-zero rows/columns) +using ITransReconTestParam = std::tuple; + +class ITransReconTest : public ::testing::TestWithParam { +protected: + void SetUp() override { + std::tie(trans_size, ttype, arch, num_non_zero_rows, num_non_zero_cols) = + GetParam(); + + src_strd = trans_size; + pred_strd = trans_size; + dst_strd = trans_size; + + pi2_src.resize(trans_size * trans_size); + // pi2_tmp needs to be large enough to hold intermediate data of width * + // height 16bits. + pi2_tmp.resize(trans_size * trans_size); + pu1_pred.resize(trans_size * trans_size); + pu1_dst_ref.resize(trans_size * trans_size); + pu1_dst_tst.resize(trans_size * trans_size); + + ref_func_selector = get_ref_func_ptr(); + tst_func_selector = get_tst_func_ptr(arch); + } + + template void RunTest(FuncPtr func_ptr) { + std::mt19937 rng(0); + std::uniform_int_distribution coeff_dist(-32768, 32767); + std::uniform_int_distribution pixel_dist(0, 255); + + // Populate pi2_src so that the requested number of rows and columns + // are potentially non-zero. Rows [0, non_zero_rows) and columns + // [0, non_zero_cols) form the non-zero region; everything else is zero. + std::fill(pi2_src.begin(), pi2_src.end(), 0); + for (int i = 0; i < trans_size; i++) { + for (int j = 0; j < trans_size; j++) { + if (i < num_non_zero_rows && j < num_non_zero_cols) { + pi2_src[i * src_strd + j] = coeff_dist(rng); + } + } + } + + for (auto &v : pu1_pred) + v = pixel_dist(rng); + + WORD32 non_zero_rows_mask = 0; + for (int i = 0; i < num_non_zero_rows && i < trans_size; i++) { + non_zero_rows_mask |= (1u << i); + } + + WORD32 non_zero_cols_mask = 0; + for (int j = 0; j < num_non_zero_cols && j < trans_size; j++) { + non_zero_cols_mask |= (1u << j); + } + + WORD32 mask = (trans_size == 32) + ? 0xFFFFFFFFu + : ((static_cast(1u) << trans_size) - 1u); + WORD32 zero_cols = (~non_zero_cols_mask) & mask; + WORD32 zero_rows = (~non_zero_rows_mask) & mask; + + (ref_func_selector->*func_ptr)( + pi2_src.data(), pi2_tmp.data(), pu1_pred.data(), pu1_dst_ref.data(), + src_strd, pred_strd, dst_strd, zero_cols, zero_rows); + (tst_func_selector->*func_ptr)( + pi2_src.data(), pi2_tmp.data(), pu1_pred.data(), pu1_dst_tst.data(), + src_strd, pred_strd, dst_strd, zero_cols, zero_rows); + ASSERT_NO_FATAL_FAILURE(compare_output( + pu1_dst_ref, pu1_dst_tst, trans_size, trans_size, dst_strd)); + } + + int trans_size; + int ttype; + IVD_ARCH_T arch; + const func_selector_t *ref_func_selector; + const func_selector_t *tst_func_selector; + + WORD32 src_strd; + WORD32 pred_strd; + WORD32 dst_strd; + WORD32 num_non_zero_rows; + WORD32 num_non_zero_cols; + std::vector pi2_src; + std::vector pi2_tmp; + std::vector pu1_pred; + std::vector pu1_dst_ref; + std::vector pu1_dst_tst; +}; + +TEST_P(ITransReconTest, Run) { + if (trans_size == 4) { + if (ttype == 1) { + RunTest(&func_selector_t::ihevc_itrans_recon_4x4_ttype1_fptr); + } else { + RunTest(&func_selector_t::ihevc_itrans_recon_4x4_fptr); + } + } else if (trans_size == 8) { + RunTest(&func_selector_t::ihevc_itrans_recon_8x8_fptr); + } else if (trans_size == 16) { + RunTest(&func_selector_t::ihevc_itrans_recon_16x16_fptr); + } else if (trans_size == 32) { +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) + GTEST_SKIP() << "SSE4.2 and SSSE3 are not matching C implementation for " + "ihevc_itrans_recon_32x32_fptr"; +#endif + RunTest(&func_selector_t::ihevc_itrans_recon_32x32_fptr); + } +} + +std::string PrintITransReconTestParam( + const testing::TestParamInfo &info) { + WORD32 trans_size, ttype, non_zero_rows, non_zero_cols; + IVD_ARCH_T arch; + std::tie(trans_size, ttype, arch, non_zero_rows, non_zero_cols) = info.param; + std::stringstream ss; + ss << "size_" << trans_size << "_ttype_" << ttype << "_nzr_" << non_zero_rows + << "_nzc_" << non_zero_cols << "_" << get_arch_str(arch); + return ss.str(); +} + +std::vector GenerateITransReconTestParams() { + std::vector params; + const WORD32 nz_options[] = {1, 2, 4, 8, 16, 32}; + + auto add_params_for_size = [&](int size, const int *ttypes, int num_ttypes) { + for (int t = 0; t < num_ttypes; t++) { + int ttype = ttypes[t]; + for (auto arch : ga_tst_arch) { + for (WORD32 nnzr : nz_options) { + if (nnzr > size) + continue; + for (WORD32 nnzc : nz_options) { + if (nnzc > size) + continue; + params.emplace_back(size, ttype, arch, nnzr, nnzc); + } + } + } + } + }; + + const int ttypes4[] = {0, 1}; + const int ttypesOther[] = {0}; + + add_params_for_size(4, ttypes4, 2); + add_params_for_size(8, ttypesOther, 1); + add_params_for_size(16, ttypesOther, 1); + add_params_for_size(32, ttypesOther, 1); + + return params; +} + +INSTANTIATE_TEST_SUITE_P(ITransRecon, ITransReconTest, + ::testing::ValuesIn(GenerateITransReconTestParams()), + PrintITransReconTestParam); + +} // namespace diff --git a/tests/common/ihevc_itrans_res_test.cc b/tests/common/ihevc_itrans_res_test.cc new file mode 100644 index 0000000..1819bce --- /dev/null +++ b/tests/common/ihevc_itrans_res_test.cc @@ -0,0 +1,138 @@ +/****************************************************************************** + * + * Copyright (C) 2026 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include "ihevc_typedefs.h" +#include "ihevc_itrans_res.h" +#include "ihevcd_function_selector.h" +#include "iv.h" +#include "ivd.h" +#include "func_selector.h" +#include "tests_common.h" +// clang-format on + +// Test parameters: trans_size, ttype (0: normal, 1: ttype1), arch +using ITransResTestParam = std::tuple; + +class ITransResTest : public ::testing::TestWithParam { +protected: + void SetUp() override { + std::tie(trans_size, ttype, arch) = GetParam(); + src_strd = trans_size; + dst_strd = trans_size; + + // Input buffer (coefficients) + src_buf.resize(trans_size * trans_size); + + // Temporary buffer (intermediate 16-bit data) + // Size needed: width * height * 16 bits = width * height * 2 bytes + // pi2_tmp is WORD16*, so we need width * height elements + tmp_buf.resize(trans_size * trans_size); + + // Output buffers + dst_buf_ref.resize(trans_size * trans_size); + dst_buf_tst.resize(trans_size * trans_size); + + std::mt19937 rng(12345); + std::uniform_int_distribution dist(-32768, 32767); + + for (auto &v : src_buf) { + v = static_cast(dist(rng)); + } + + // Fill dst buffers with pattern + std::fill(dst_buf_ref.begin(), dst_buf_ref.end(), 0xCDCD); + std::fill(dst_buf_tst.begin(), dst_buf_tst.end(), 0xCDCD); + + tst = get_tst_func_ptr(arch); + ref = get_ref_func_ptr(); + } + + template void RunTest(FuncPtr func_ptr) { + (ref->*func_ptr)(src_buf.data(), tmp_buf.data(), dst_buf_ref.data(), + src_strd, dst_strd, 0, 0); + (tst->*func_ptr)(src_buf.data(), tmp_buf.data(), dst_buf_tst.data(), + src_strd, dst_strd, 0, 0); + + ASSERT_NO_FATAL_FAILURE(compare_output( + dst_buf_ref, dst_buf_tst, trans_size, trans_size, dst_strd)); + } + + int trans_size; + int ttype; + IVD_ARCH_T arch; + int src_strd, dst_strd; + std::vector src_buf; + std::vector tmp_buf; + std::vector dst_buf_ref; + std::vector dst_buf_tst; + const func_selector_t *tst; + const func_selector_t *ref; +}; + +TEST_P(ITransResTest, Run) { + if (trans_size == 4) { + if (ttype == 1) { + RunTest(&func_selector_t::ihevc_itrans_res_4x4_ttype1_fptr); + } else { + RunTest(&func_selector_t::ihevc_itrans_res_4x4_fptr); + } + } else if (trans_size == 8) { + RunTest(&func_selector_t::ihevc_itrans_res_8x8_fptr); + } else if (trans_size == 16) { + RunTest(&func_selector_t::ihevc_itrans_res_16x16_fptr); + } else if (trans_size == 32) { + RunTest(&func_selector_t::ihevc_itrans_res_32x32_fptr); + } +} + +std::string PrintITransResTestParam( + const testing::TestParamInfo &info) { + int trans_size, ttype; + IVD_ARCH_T arch; + std::tie(trans_size, ttype, arch) = info.param; + std::stringstream ss; + ss << "size_" << trans_size << "_ttype_" << ttype << "_" + << get_arch_str(arch); + return ss.str(); +} + +// Instantiate tests +// Size 4: ttype 0 and 1 +INSTANTIATE_TEST_SUITE_P(ITransRes4x4, ITransResTest, + ::testing::Combine(::testing::Values(4), + ::testing::Values(0, 1), + ::testing::ValuesIn(ga_tst_arch)), + PrintITransResTestParam); + +// Size 8, 16, 32: ttype 0 +INSTANTIATE_TEST_SUITE_P(ITransResOther, ITransResTest, + ::testing::Combine(::testing::Values(8, 16, 32), + ::testing::Values(0), + ::testing::ValuesIn(ga_tst_arch)), + PrintITransResTestParam); diff --git a/tests/common/ihevc_luma_inter_pred_test.cc b/tests/common/ihevc_luma_inter_pred_test.cc new file mode 100644 index 0000000..5a41802 --- /dev/null +++ b/tests/common/ihevc_luma_inter_pred_test.cc @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Copyright (C) 2026 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include "ihevc_typedefs.h" +#include "ihevc_inter_pred.h" +#include "ihevcd_function_selector.h" +#include "iv.h" +#include "ivd.h" +#include "func_selector.h" +#include "tests_common.h" +// clang-format on + +// Test parameters: width, height, src_stride_mul, dst_stride_mul, coeff_idx, +// arch +using LumaInterPredTestParam = + std::tuple, int, int, int, IVD_ARCH_T>; +template +class LumaInterPredTest + : public ::testing::TestWithParam { +protected: + void SetUp() override { + + std::pair block_size; + std::tie(block_size, src_strd_mul, dst_strd_mul, coeff_idx, arch) = + GetParam(); + std::tie(wd, ht) = block_size; + src_strd = wd * src_strd_mul; + dst_strd = wd * dst_strd_mul; + + dst_buf_ref.resize(dst_strd * ht); + dst_buf_tst.resize(dst_strd * ht); + + // Set pv_src to a valid position within src_buf to allow negative indexing + pv_src = (srcType *)g_src8_buf.data() + kTapSize / 2 * src_strd; + pv_dst_ref = dst_buf_ref.data(); + pv_dst_tst = dst_buf_tst.data(); + + pi1_coeffs = gai1_ihevc_luma_filter[coeff_idx]; + tst = get_tst_func_ptr(arch); + ref = get_ref_func_ptr(); + } + + template void RunTest(FuncPtr func_ptr) { + (ref->*func_ptr)(pv_src, pv_dst_ref, src_strd, dst_strd, pi1_coeffs, ht, + wd); + (tst->*func_ptr)(pv_src, pv_dst_tst, src_strd, dst_strd, pi1_coeffs, ht, + wd); + ASSERT_NO_FATAL_FAILURE( + compare_output(dst_buf_ref, dst_buf_tst, wd, ht, dst_strd)); + } + + int wd, ht, src_strd_mul, dst_strd_mul, coeff_idx; + int src_strd, dst_strd; + std::vector dst_buf_ref; + std::vector dst_buf_tst; + srcType *pv_src; + dstType *pv_dst_ref; + dstType *pv_dst_tst; + WORD8 *pi1_coeffs; + IVD_ARCH_T arch; + const func_selector_t *tst; + const func_selector_t *ref; +}; + +class LumaInterPred_8_8_Test : public LumaInterPredTest {}; +class LumaInterPred_8_16_Test : public LumaInterPredTest {}; +class LumaInterPred_16_8_Test : public LumaInterPredTest {}; +class LumaInterPred_16_16_Test : public LumaInterPredTest {}; + +TEST_P(LumaInterPred_8_8_Test, LumaCopyTest) { + RunTest(&func_selector_t::ihevc_inter_pred_luma_copy_fptr); +} + +TEST_P(LumaInterPred_8_8_Test, LumaHorzTest) { + RunTest(&func_selector_t::ihevc_inter_pred_luma_horz_fptr); +} + +TEST_P(LumaInterPred_8_8_Test, LumaVertTest) { + RunTest(&func_selector_t::ihevc_inter_pred_luma_vert_fptr); +} + +TEST_P(LumaInterPred_8_16_Test, LumaCopyTest) { + RunTest(&func_selector_t::ihevc_inter_pred_luma_copy_w16out_fptr); +} + +TEST_P(LumaInterPred_8_16_Test, LumaHorzTest) { + RunTest(&func_selector_t::ihevc_inter_pred_luma_horz_w16out_fptr); +} + +TEST_P(LumaInterPred_8_16_Test, LumaVertTest) { + RunTest(&func_selector_t::ihevc_inter_pred_luma_vert_w16out_fptr); +} + +TEST_P(LumaInterPred_16_8_Test, LumaVertTest) { + RunTest(&func_selector_t::ihevc_inter_pred_luma_vert_w16inp_fptr); +} + +TEST_P(LumaInterPred_16_16_Test, LumaVertTest) { +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) + // TODO: SSE4.2 and SSSE3 are not matching C implementation + GTEST_SKIP() << "SSE4.2 and SSSE3 are not matching C implementation for " + "ihevc_inter_pred_luma_vert_w16inp_w16out_fptr"; +#endif + RunTest(&func_selector_t::ihevc_inter_pred_luma_vert_w16inp_w16out_fptr); +} + +auto kLumaInterPredTestParams = + ::testing::Combine(::testing::ValuesIn(kPUBlockSizes), + ::testing::Values(1, 2), // Src Stride Multiplier + ::testing::Values(1, 2), // Dst Stride Multiplier + ::testing::Values(0, 1, 2, 3), // Coeff index + ::testing::ValuesIn(ga_tst_arch) // arch + ); + +std::string PrintLumaInterPredTestParam( + const testing::TestParamInfo &info) { + int wd, ht, src_strd_mul, dst_strd_mul, coeff_idx; + IVD_ARCH_T arch; + std::pair block_size; + std::tie(block_size, src_strd_mul, dst_strd_mul, coeff_idx, arch) = + info.param; + std::tie(wd, ht) = block_size; + std::stringstream ss; + ss << wd << "x" << ht << "_src_stride_" << src_strd_mul * wd << "_dst_stride_" + << dst_strd_mul * wd << "_coeff_" << coeff_idx << "_" + << get_arch_str(arch); + return ss.str(); +} + +INSTANTIATE_TEST_SUITE_P(LumaCopyTest, LumaInterPred_8_8_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); + +INSTANTIATE_TEST_SUITE_P(LumaHorzTest, LumaInterPred_8_8_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); + +INSTANTIATE_TEST_SUITE_P(LumaVertTest, LumaInterPred_8_8_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); + +INSTANTIATE_TEST_SUITE_P(LumaCopyTest, LumaInterPred_8_16_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); + +INSTANTIATE_TEST_SUITE_P(LumaHorzTest, LumaInterPred_8_16_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); + +INSTANTIATE_TEST_SUITE_P(LumaVertTest, LumaInterPred_8_16_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); + +INSTANTIATE_TEST_SUITE_P(LumaVertTest, LumaInterPred_16_8_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); + +INSTANTIATE_TEST_SUITE_P(LumaVertTest, LumaInterPred_16_16_Test, + kLumaInterPredTestParams, PrintLumaInterPredTestParam); diff --git a/tests/common/ihevc_luma_intra_pred_test.cc b/tests/common/ihevc_luma_intra_pred_test.cc new file mode 100644 index 0000000..1a1b635 --- /dev/null +++ b/tests/common/ihevc_luma_intra_pred_test.cc @@ -0,0 +1,146 @@ +/****************************************************************************** + * + * Copyright (C) 2026 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include "ihevc_typedefs.h" +#include "ihevc_intra_pred.h" +#include "ihevcd_function_selector.h" +#include "iv.h" +#include "ivd.h" +#include "func_selector.h" +#include "tests_common.h" +// clang-format on + +// Test parameters: block_size, mode, dst_stride_mul, arch +using LumaIntraPredTestParam = std::tuple; + +class LumaIntraPredTest + : public ::testing::TestWithParam { +protected: + void SetUp() override { + std::tie(nt, mode, dst_strd_mul, arch) = GetParam(); + src_strd = 1; // Intra pred reference is usually dense + dst_strd = nt * dst_strd_mul; + + // Reference buffer size: 4 * nt + 1 + int ref_size = 4 * nt + 1; + ref_buf.resize(ref_size); + + // Initialize reference buffer with random data + std::mt19937 rng(12345); + std::uniform_int_distribution dist(0, 255); + for (auto &v : ref_buf) { + v = static_cast(dist(rng)); + } + + // Use a pointer aligned or offset to ensure we have valid data + // The function expects pu1_ref to point to the start of the reference array + // Top-left is usually at index 2*nt. + // We just pass the data pointer. + pu1_ref = ref_buf.data(); + + dst_buf_ref.resize(dst_strd * nt); + dst_buf_tst.resize(dst_strd * nt); + + // Initialize dst buffers with pattern to detect over/under writes + std::fill(dst_buf_ref.begin(), dst_buf_ref.end(), 0xCD); + std::fill(dst_buf_tst.begin(), dst_buf_tst.end(), 0xCD); + + pu1_dst_ref = dst_buf_ref.data(); + pu1_dst_tst = dst_buf_tst.data(); + + tst = get_tst_func_ptr(arch); + ref = get_ref_func_ptr(); + } + + template void RunTest(FuncPtr func_ptr) { + (ref->*func_ptr)(pu1_ref, src_strd, pu1_dst_ref, dst_strd, nt, mode); + (tst->*func_ptr)(pu1_ref, src_strd, pu1_dst_tst, dst_strd, nt, mode); + ASSERT_NO_FATAL_FAILURE( + compare_output(dst_buf_ref, dst_buf_tst, nt, nt, dst_strd)); + } + + int nt, mode, dst_strd_mul; + int src_strd, dst_strd; + std::vector ref_buf; + std::vector dst_buf_ref; + std::vector dst_buf_tst; + UWORD8 *pu1_ref; + UWORD8 *pu1_dst_ref; + UWORD8 *pu1_dst_tst; + IVD_ARCH_T arch; + const func_selector_t *tst; + const func_selector_t *ref; +}; + +TEST_P(LumaIntraPredTest, Run) { + if (mode == 0) + RunTest(&func_selector_t::ihevc_intra_pred_luma_planar_fptr); + else if (mode == 1) + RunTest(&func_selector_t::ihevc_intra_pred_luma_dc_fptr); + else if (mode == 2) + RunTest(&func_selector_t::ihevc_intra_pred_luma_mode2_fptr); + else if (mode >= 3 && mode <= 9) + RunTest(&func_selector_t::ihevc_intra_pred_luma_mode_3_to_9_fptr); + else if (mode == 10) { + GTEST_SKIP() << "SIMD implementation is not matching C implementation for " + "ihevc_intra_pred_luma_horz_fptr"; + RunTest(&func_selector_t::ihevc_intra_pred_luma_horz_fptr); + } else if (mode >= 11 && mode <= 17) + RunTest(&func_selector_t::ihevc_intra_pred_luma_mode_11_to_17_fptr); + else if (mode == 18 || mode == 34) + RunTest(&func_selector_t::ihevc_intra_pred_luma_mode_18_34_fptr); + else if (mode >= 19 && mode <= 25) + RunTest(&func_selector_t::ihevc_intra_pred_luma_mode_19_to_25_fptr); + else if (mode == 26) { + GTEST_SKIP() << "SIMD implementation is not matching C implementation for " + "ihevc_intra_pred_luma_ver_fptr"; + RunTest(&func_selector_t::ihevc_intra_pred_luma_ver_fptr); + } else if (mode >= 27 && mode <= 33) + RunTest(&func_selector_t::ihevc_intra_pred_luma_mode_27_to_33_fptr); + else + FAIL() << "Invalid mode: " << mode; +} + +std::string PrintLumaIntraPredTestParam( + const testing::TestParamInfo &info) { + int nt, mode, dst_strd_mul; + IVD_ARCH_T arch; + std::tie(nt, mode, dst_strd_mul, arch) = info.param; + std::stringstream ss; + ss << "nt_" << nt << "_mode_" << mode << "_dst_stride_" << nt * dst_strd_mul + << "_" << get_arch_str(arch); + return ss.str(); +} + +INSTANTIATE_TEST_SUITE_P( + LumaIntraPred, LumaIntraPredTest, + ::testing::Combine(::testing::Values(4, 8, 16, 32), ::testing::Range(0, 35), + ::testing::Values(1, 2), // Dst Stride Multiplier + ::testing::ValuesIn(ga_tst_arch)), + PrintLumaIntraPredTestParam); diff --git a/tests/common/tests_common.cc b/tests/common/tests_common.cc new file mode 100644 index 0000000..b840157 --- /dev/null +++ b/tests/common/tests_common.cc @@ -0,0 +1,99 @@ +/****************************************************************************** + * + * Copyright (C) 2026 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include "ihevc_typedefs.h" +#include "ihevc_inter_pred.h" +#include "ihevcd_function_selector.h" +#include "iv.h" +#include "ivd.h" +#include "tests_common.h" +// clang-format on + +const std::vector> kPUBlockSizes = { + // clang-format off + {4, 4}, + {8, 8}, {8, 4}, {4, 8}, + {16, 16}, {16, 12}, {16, 8}, {16, 4}, {12, 16}, {8, 16}, {4, 16}, + {32, 32}, {32, 24}, {32, 16}, {32, 8}, {24, 32}, {16, 32}, {8, 32}, + {64, 64}, {64, 48}, {64, 32}, {64, 16}, {48, 64}, {32, 64}, {16, 64}, + // clang-format on +}; + +const std::vector g_src8_buf = []() { + // allocate twice to account for WORD16 as well + std::vector buf(kMaxSize * kMaxHeight * 2); + std::mt19937 rng(12345); + std::uniform_int_distribution dist(0, 255); + for (auto &v : buf) + v = static_cast(dist(rng)); + return buf; +}(); + +std::string get_arch_str(IVD_ARCH_T arch) { + std::string arch_str; + switch (arch) { + case ARCH_X86_GENERIC: + arch_str = "GENERIC"; + break; + case ARCH_X86_SSSE3: + arch_str = "SSSE3"; + break; + case ARCH_X86_SSE42: + arch_str = "SSE42"; + break; + case ARCH_X86_AVX2: + arch_str = "AVX2"; + break; + case ARCH_ARMV8_GENERIC: + arch_str = "ARMV8"; + break; + case ARCH_ARM_A9Q: + arch_str = "A9Q"; + break; + default: + arch_str = "UNKNOWN"; + break; + } + return arch_str; +} + +const std::vector ga_tst_arch = { +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) + ARCH_X86_SSSE3, + ARCH_X86_SSE42, +#ifndef DISABLE_AVX2 + ARCH_X86_AVX2, +#endif // DISABLE_AVX2 +#elif defined(__aarch64__) + ARCH_ARMV8_GENERIC, +#elif defined(__arm__) + ARCH_ARM_A9Q, +#endif +}; \ No newline at end of file diff --git a/tests/common/tests_common.h b/tests/common/tests_common.h new file mode 100644 index 0000000..bd31278 --- /dev/null +++ b/tests/common/tests_common.h @@ -0,0 +1,61 @@ +/****************************************************************************** + * + * Copyright (C) 2026 Ittiam Systems Pvt Ltd, Bangalore + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ +#ifndef __TESTS_COMMON_H__ +#define __TESTS_COMMON_H__ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include "ihevc_typedefs.h" +#include "ihevc_inter_pred.h" +#include "ihevcd_function_selector.h" +#include "iv.h" +#include "ivd.h" +// clang-format on + +static constexpr int kMaxSize = 64; +static constexpr int kTapSize = 8; +static constexpr int kMaxHeight = kMaxSize + kTapSize; + +extern const std::vector> kPUBlockSizes; +extern const std::vector g_src8_buf; +extern const std::vector ga_tst_arch; + +// Compare outputs +template +static void compare_output(const std::vector &ref, + const std::vector &test, int wd, int ht, + int dst_strd) { + int size_bytes = wd * sizeof(T); + for (int i = 0; i < ht; ++i) { + int cmp = memcmp(ref.data() + i * dst_strd, test.data() + i * dst_strd, + size_bytes); + ASSERT_EQ(0, cmp) << "Mismatch at row " << i << " for size " << wd << "x" + << ht; + } +} + +std::string get_arch_str(IVD_ARCH_T arch); +#endif /* __TESTS_COMMON_H__ */ \ No newline at end of file