Compare commits

..

No commits in common. "main" and "v1.5.1" have entirely different histories.
main ... v1.5.1

161 changed files with 3807 additions and 10452 deletions

View file

@ -20,7 +20,7 @@ jobs:
language: c++
fuzz-seconds: 600
- name: Upload Crash
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts

View file

@ -2,100 +2,22 @@ name: CMake
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
env:
BUILD_TYPE: Release
jobs:
build:
strategy:
matrix:
include:
- name: ubuntu-latest-gcc-cmake
os: ubuntu-latest
cc: gcc
cxx: g++
build-system: cmake
cmake-opts: ''
- name: ubuntu-latest-clang-cmake
os: ubuntu-latest
cc: clang
cxx: clang++
build-system: cmake
cmake-opts: ''
- name: ubuntu-24.04-arm-clang-cmake
os: ubuntu-24.04-arm
cc: clang
cxx: clang++
build-system: cmake
cmake-opts: ''
- name: ubuntu-latest-clang-cmake-asan-fuzzer
os: ubuntu-latest
cc: clang
cxx: clang++
build-system: cmake
cmake-opts: '-DSANITIZE=fuzzer-no-link,address'
- name: ubuntu-latest-clang-cmake-ninja
os: ubuntu-latest
cc: clang
cxx: clang++
build-system: cmake
cmake-opts: '-G Ninja'
- name: macos-latest-clang-cmake
os: macos-latest
cc: clang
cxx: clang++
build-system: cmake
cmake-opts: ''
- name: ubuntu-latest-cross-aarch64-cmake
os: ubuntu-latest
cc: aarch64-linux-gnu-gcc
cxx: aarch64-linux-gnu-g++
build-system: cmake
cmake-opts: '-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch64_toolchain.cmake'
- name: ubuntu-latest-cross-aarch32-cmake
os: ubuntu-latest
cc: arm-linux-gnueabihf-gcc
cxx: arm-linux-gnueabihf-g++
build-system: cmake
cmake-opts: '-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch32_toolchain.cmake'
runs-on: ${{ matrix.os }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Linux dependencies
if: startsWith(matrix.os,'ubuntu') && contains(matrix.cmake-opts,'-G Ninja')
run: |
sudo apt-get update
sudo apt-get install -y ninja-build
- name: Install cross-aarch64 dependencies
if: startsWith(matrix.os,'ubuntu') && contains(matrix.cmake-opts,'aarch64')
run: |
sudo apt-get update
sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- name: Install cross-arm dependencies
if: startsWith(matrix.os,'ubuntu') && contains(matrix.cmake-opts,'aarch32')
run: |
sudo apt-get update
sudo apt-get install gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf
- uses: actions/checkout@v3
- name: Configure CMake
env:
CC: ${{ matrix.cc }}
CXX: ${{ matrix.cxx }}
run: cmake -B ${{github.workspace}}/out -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} ${{ matrix.cmake-opts }}
run: cmake -B ${{github.workspace}}/out -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
- name: Build
run: cmake --build ${{github.workspace}}/out --config ${{env.BUILD_TYPE}}

View file

@ -1,18 +0,0 @@
{
"configurations": [
{
"name": "Native",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/common",
"${workspaceFolder}/decoder",
"${workspaceFolder}/encoder"
],
"defines": [],
"cStandard": "c17",
"cppStandard": "c++17",
"configurationProvider": "ms-vscode.cmake-tools"
}
],
"version": 4
}

81
.vscode/launch.json vendored
View file

@ -1,81 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Run hevcenc - Linux",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/hevcenc",
"args": ["../test/encoder/vid_enc_cfg.txt"],
"stopAtEntry": false,
"cwd": "${workspaceFolder}/build",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
},
{
"name": "Run hevcdec - Linux",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/hevcdec",
"args": ["../test/decoder/test.cfg"],
"stopAtEntry": false,
"cwd": "${workspaceFolder}/build",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
},
{
"name": "Run hevcenc - Mac",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/hevcenc",
"args": ["../test/encoder/vid_enc_cfg.txt"],
"stopAtEntry": false,
"cwd": "${workspaceFolder}/build",
"environment": [],
"externalConsole": false,
"MIMode": "lldb",
"setupCommands": [
{
"description": "Enable pretty-printing for lldb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
},
{
"name": "Run hevcdec - Mac",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/hevcdec",
"args": ["../test/decoder/test.cfg"],
"stopAtEntry": false,
"cwd": "${workspaceFolder}/build",
"environment": [],
"externalConsole": false,
"MIMode": "lldb",
"setupCommands": [
{
"description": "Enable pretty-printing for lldb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
}
]
}

19
.vscode/settings.json vendored
View file

@ -1,19 +0,0 @@
{
"cmake.buildDirectory": "${workspaceFolder}/build",
"cmake.sourceDirectory": "${workspaceFolder}",
"cmake.configureArgs": [
"-DENABLE_MVC=OFF",
"-DENABLE_SVC=OFF",
"-DENABLE_TESTS=OFF",
"-DCMAKE_C_COMPILER=clang",
"-DCMAKE_CXX_COMPILER=clang++"
],
"cmake.preferredGenerators": [
"Unix Makefiles"
],
"cmake.debugConfig": {
"hevcenc": "hevcenc",
"hevcdec": "hevcdec"
},
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools"
}

29
.vscode/tasks.json vendored
View file

@ -1,29 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "cmake",
"label": "Configure",
"command": "configure",
"problemMatcher": [
"$gcc"
],
"group": "build"
},
{
"type": "cmake",
"label": "Build",
"command": "build",
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"dependsOn": [
"Configure"
]
}
]
}

View file

@ -41,7 +41,6 @@ cc_library_static {
cflags: [
"-D_LIB",
"-fPIC",
"-DENABLE_MAIN_REXT_PROFILE",
"-O3",
"-DANDROID",
@ -51,6 +50,8 @@ cc_library_static {
// common/x86/ihevc_sao_ssse3_intr.c: implicit conversion from
// 'int' to 'char' changes value from 128 to -128
"-Wno-error=constant-conversion",
// #KEEP_THREAD_ACTIVE is experimental
"-UKEEP_THREADS_ACTIVE",
],
export_include_dirs: [
@ -65,7 +66,6 @@ cc_library_static {
"common/ihevc_chroma_iquant_itrans_recon.c",
"common/ihevc_chroma_iquant_recon.c",
"common/ihevc_chroma_itrans_recon.c",
"common/ihevc_chroma_itrans_recon_32x32.c",
"common/ihevc_chroma_itrans_recon_16x16.c",
"common/ihevc_chroma_itrans_recon_8x8.c",
"common/ihevc_chroma_recon.c",
@ -79,7 +79,6 @@ cc_library_static {
"common/ihevc_iquant_itrans_recon.c",
"common/ihevc_iquant_recon.c",
"common/ihevc_itrans.c",
"common/ihevc_itrans_res.c",
"common/ihevc_itrans_recon.c",
"common/ihevc_itrans_recon_16x16.c",
"common/ihevc_itrans_recon_32x32.c",
@ -205,6 +204,7 @@ cc_library_static {
"decoder/arm/ihevcd_function_selector_noneon.c",
"decoder/arm64/ihevcd_fmt_conv_420sp_to_420p.s",
"decoder/arm64/ihevcd_fmt_conv_420sp_to_420sp.s",
"decoder/arm64/ihevcd_fmt_conv_420sp_to_rgba8888.s",
"decoder/arm64/ihevcd_function_selector_av8.c",
"decoder/arm64/ihevcd_itrans_recon_dc_chroma.s",
"decoder/arm64/ihevcd_itrans_recon_dc_luma.s",
@ -284,6 +284,7 @@ cc_library_static {
"common/arm/ihevc_weighted_pred_uni.s",
"decoder/arm/ihevcd_fmt_conv_420sp_to_420p.s",
"decoder/arm/ihevcd_fmt_conv_420sp_to_420sp.s",
"decoder/arm/ihevcd_fmt_conv_420sp_to_rgba8888.s",
"decoder/arm/ihevcd_function_selector_a9q.c",
"decoder/arm/ihevcd_itrans_recon_dc_chroma.s",
"decoder/arm/ihevcd_itrans_recon_dc_luma.s",
@ -418,7 +419,7 @@ cc_library_static {
min_sdk_version: "29",
}
cc_binary {
cc_test {
name: "hevcdec",
host_supported: true,
cflags: [
@ -429,6 +430,7 @@ cc_binary {
"-Wall",
"-Werror",
],
gtest: false,
srcs: ["test/decoder/main.c"],
static_libs: ["libhevcdec"],
target: {
@ -461,7 +463,6 @@ cc_library_static {
"common/ihevc_cabac_tables.c",
"common/ihevc_chroma_intra_pred_filters.c",
"common/ihevc_chroma_itrans_recon.c",
"common/ihevc_chroma_itrans_recon_32x32.c",
"common/ihevc_chroma_itrans_recon_16x16.c",
"common/ihevc_chroma_itrans_recon_8x8.c",
"common/ihevc_common_tables.c",
@ -804,7 +805,7 @@ cc_library_static {
min_sdk_version: "29",
}
cc_binary {
cc_test {
name: "hevcenc",
host_supported: true,
cflags: [
@ -813,6 +814,7 @@ cc_binary {
"-Wall",
"-Werror",
],
gtest: false,
srcs: ["test/encoder/main.c"],
static_libs: ["libhevcenc"],
sanitize: {

View file

@ -1,17 +1,6 @@
cmake_minimum_required(VERSION 3.9.1)
project(libhevc C CXX)
if(NOT DEFINED SYSTEM_NAME)
set(SYSTEM_NAME ${CMAKE_HOST_SYSTEM_NAME})
endif()
if(NOT DEFINED SYSTEM_PROCESSOR)
set(SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif()
if(NOT "${SYSTEM_NAME}" STREQUAL "Darwin")
enable_language(ASM)
endif()
enable_language(ASM)
set(HEVC_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
set(HEVC_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
@ -42,7 +31,6 @@ endif()
include("${HEVC_ROOT}/cmake/utils.cmake")
libhevc_add_compile_options()
libhevc_add_gtest()
libhevc_add_definitions()
libhevc_set_link_libraries()
@ -55,5 +43,3 @@ include("${HEVC_ROOT}/test/encoder/hevcenc.cmake")
include("${HEVC_ROOT}/fuzzer/hevc_dec_fuzzer.cmake")
include("${HEVC_ROOT}/fuzzer/hevc_enc_fuzzer.cmake")
include("${HEVC_ROOT}/tests/common/common.cmake")

View file

@ -7,8 +7,6 @@ Supports:
- aarch32/aarch64 on Linux.
- aarch32/aarch64 on Android.
- x86_32/x86_64 on Linux.
- aarch64 on Mac.
- x86_64 on Mac.
## Native Builds
Use the following commands for building on the target machine
@ -53,29 +51,3 @@ $ make
$ cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch32_toolchain.cmake
$ make
```
### Building for android
NOTE: This assumes that you are building on a machine that has
[Android NDK](https://developer.android.com/ndk/downloads).
```
$ cd external/libhevc
$ mkdir build
$ cd build
```
#### Armv7 (32-bit)
cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/android_toolchain.cmake\
-DHEVC_ANDROID_NDK_PATH=/opt/android-ndk-r26d/\
-DANDROID_ABI=armeabi-v7a\
-DANDROID_PLATFORM=android-23 ../
make
#### Armv8 (64-bit)
cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/android_toolchain.cmake\
-DHEVC_ANDROID_NDK_PATH=/opt/android-ndk-r26d/\
-DANDROID_ABI=arm64-v8a\
-DANDROID_PLATFORM=android-23 ../
make

View file

@ -1,10 +1,7 @@
set(SYSTEM_NAME Linux)
set(SYSTEM_PROCESSOR aarch32)
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch32)
# Modify these variables with paths to appropriate compilers that can produce
# armv7 targets
set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
# Build all binaries as static, so that they can be run using qemu
set(CMAKE_EXE_LINKER_FLAGS "-static")

View file

@ -1,5 +1,5 @@
set(SYSTEM_NAME Linux)
set(SYSTEM_PROCESSOR aarch64)
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
# Modify these variables with paths to appropriate compilers that can produce
# armv8 targets
@ -11,6 +11,3 @@ set(CMAKE_C_COMPILER_AR
set(CMAKE_CXX_COMPILER_AR
aarch64-linux-gnu-gcc-ar
CACHE FILEPATH "Archiver")
# Build all binaries as static, so that they can be run using qemu
set(CMAKE_EXE_LINKER_FLAGS "-static")

View file

@ -1,34 +0,0 @@
set(SYSTEM_NAME Android)
set(CMAKE_SYSTEM_NAME Android)
if(NOT ANDROID_PLATFORM)
set(ANDROID_PLATFORM android-23)
endif()
# Choose target architecture with:
# -DANDROID_ABI={armeabi-v7a, arm64-v8a, x86, x86_64}
if(NOT ANDROID_ABI)
set(ANDROID_ABI arm64-v8a)
endif()
if(ANDROID_ABI MATCHES "^armeabi")
set(SYSTEM_PROCESSOR aarch32)
else()
set(SYSTEM_PROCESSOR aarch64)
endif()
# Toolchain files don't have access to cached variables:
# https://gitlab.kitware.com/cmake/cmake/issues/16170. Set an intermediate
# environment variable when loaded the first time.
if(HEVC_ANDROID_NDK_PATH)
set(ENV{HEVC_ANDROID_NDK_PATH} "${HEVC_ANDROID_NDK_PATH}")
else()
set(HEVC_ANDROID_NDK_PATH "$ENV{HEVC_ANDROID_NDK_PATH}")
endif()
if(NOT HEVC_ANDROID_NDK_PATH)
message(FATAL_ERROR "HEVC_ANDROID_NDK_PATH not set.")
return()
endif()
include("${HEVC_ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake")

View file

@ -2,9 +2,9 @@ include(CheckCXXCompilerFlag)
# Adds compiler options for all targets
function(libhevc_add_compile_options)
if("${SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${SYSTEM_PROCESSOR}" STREQUAL "arm64")
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
add_compile_options(-march=armv8-a)
elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch32")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch32")
add_compile_options(-march=armv7-a -mfpu=neon)
else()
add_compile_options(-msse4.2 -mno-avx)
@ -32,15 +32,9 @@ endfunction()
# Adds defintions for all targets
function(libhevc_add_definitions)
if("${SYSTEM_NAME}" STREQUAL "Darwin")
if("${SYSTEM_PROCESSOR}" STREQUAL "arm64")
add_definitions(-DARMV8 -DDARWIN -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC)
else()
add_definitions(-DX86 -DDARWIN -DDISABLE_AVX2 -DDEFAULT_ARCH=D_ARCH_X86_GENERIC)
endif()
elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch64")
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
add_definitions(-DARMV8 -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC -DENABLE_NEON)
elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch32")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch32")
add_definitions(-DARMV7 -DDEFAULT_ARCH=D_ARCH_ARM_A9Q -DENABLE_NEON
-DDISABLE_NEONINTR)
else()
@ -118,35 +112,3 @@ endfunction()
function(libhevc_add_fuzzer NAME LIB)
libhevc_add_executable(${NAME} ${LIB} FUZZER 1 ${ARGV})
endfunction()
# Adds GoogleTest and Threads dependency
function(libhevc_add_gtest)
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
endfunction()
# cmake-format: off
# Adds a target for a gtest executable
#
# Arguments:
# NAME: Name of the executable
#
# Optional Arguments:
# SOURCES: Additional source files
# cmake-format: on
function(libhevc_add_gtest_executable NAME)
set(multi_value_args SOURCES)
cmake_parse_arguments(ARG "" "" "${multi_value_args}" ${ARGN})
libhevc_add_executable(
${NAME} libhevcdec
SOURCES ${HEVC_ROOT}/tests/common/func_selector.cc
${HEVC_ROOT}/tests/common/tests_common.cc ${ARG_SOURCES}
LIBS GTest::gtest_main)
endfunction()

View file

@ -438,11 +438,6 @@ void ihevc_intra_pred_luma_ref_substitution_neonintr(UWORD8 *pu1_top_left,
* @param[in] mode
* integer intraprediction mode
*
* @param[in] intra_smoothing_flags
* integer bit 3 indicates if intra smoothing is enabled/disabled
* unconditionally. this is applicable to frext profiles only
* bit 0 indicates strong intra smoothing enabled/disabled
*
* @returns
*
* @remarks
@ -456,7 +451,7 @@ void ihevc_intra_pred_ref_filtering_neonintr(UWORD8 *pu1_src,
WORD32 nt,
UWORD8 *pu1_dst,
WORD32 mode,
WORD32 intra_smoothing_flags)
WORD32 strong_intra_smoothing_enable_flag)
{
WORD32 filter_flag;
WORD32 i = 0;
@ -480,12 +475,10 @@ void ihevc_intra_pred_ref_filtering_neonintr(UWORD8 *pu1_src,
WORD32 abs_cond_left_flag = 0;
WORD32 abs_cond_top_flag = 0;
WORD32 dc_val = 1 << (BIT_DEPTH - 5);
WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3);
WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1;
shift_res = vdup_n_u8(0);
filter_flag = intra_smoothing_disabled ?
0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)));
filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2));
if(0 == filter_flag)
{
if(pu1_src == pu1_dst)

View file

@ -103,8 +103,7 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
WORD32 nt,
WORD32 nbr_flags,
UWORD8 *pu1_dst,
WORD32 dst_strd,
WORD32 chroma_format_idc)
WORD32 dst_strd)
{
UWORD8 pu1_ref_u, pu1_ref_v;
WORD32 dc_val, i, j;
@ -181,7 +180,7 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
// U-V interleaved Top-top right samples
}
if(nt == 4 || (nt == 8 && chroma_format_idc == CHROMA_FMT_IDC_YUV444))
if(nt == 4)
{
/* 1 bit extraction for all the neighboring blocks */
tp_left = (nbr_flags & 0x10000) >> 16;
@ -249,9 +248,8 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
}
}
else if(nt == 8 || (nt == 16 && chroma_format_idc == CHROMA_FMT_IDC_YUV444))
else if(nt == 8)
{
WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1;
WORD32 nbr_flags_temp = 0;
nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
+ ((nbr_flags & 0x300) >> 4)
@ -261,16 +259,16 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
/* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
/* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
{
nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * (4 * sub_sample); /* for bottom left and left */
if(nbr_id_from_bl == 32 * sub_sample)
nbr_id_from_bl = 16 * sub_sample;
if(nbr_id_from_bl == 16 * sub_sample)
nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 4; /* for bottom left and left */
if(nbr_id_from_bl == 32)
nbr_id_from_bl = 16;
if(nbr_id_from_bl == 16)
{
/* for top left : 1 pel per nbr bit */
if(!((nbr_flags_temp >> 8) & 0x1))
{
nbr_id_from_bl++;
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4 * sub_sample; /* top and top right; 8 pels per nbr bit */
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4; /* top and top right; 8 pels per nbr bit */
}
}
@ -289,14 +287,14 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
}
/* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
while(nbr_id_from_bl < ((T8C_4NT * sub_sample)+1))
while(nbr_id_from_bl < ((T8C_4NT)+1))
{
/* To Obtain the next unavailable idx flag after reverse neighbor substitution */
/* Divide by 8 to obtain the original index */
frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/
frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
/* The Top-left flag is at the last bit location of nbr_flags*/
if(nbr_id_from_bl == (T8C_4NT * sub_sample / 2))
if(nbr_id_from_bl == (T8C_4NT / 2))
{
get_bits = GET_BIT(nbr_flags_temp, 8);
@ -315,23 +313,22 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
UWORD16 *pu2_dst;
/* 8 pel substitution (other than TL) */
pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample);
ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
}
}
nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample;
nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT / 2)) ? 1 : 4;
}
}
else if(nt == 16 || (nt == 32 && chroma_format_idc == CHROMA_FMT_IDC_YUV444))
else if(nt == 16)
{
WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1;
/* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
/* as each bit in nbr flags corresponds to 4 pels for bot_left, left, top and topright but 1 pel for topleft */
{
nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4 * sub_sample; /* for bottom left and left */
nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4; /* for bottom left and left */
if(nbr_id_from_bl == 32 * sub_sample)
if(nbr_id_from_bl == 32)
{
/* for top left : 1 pel per nbr bit */
if(!((nbr_flags >> 16) & 0x1))
@ -339,7 +336,7 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
/* top left not available */
nbr_id_from_bl++;
/* top and top right; 4 pels per nbr bit */
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4 * sub_sample;
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4;
}
}
/* Reverse Substitution Process*/
@ -357,14 +354,14 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
}
/* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
while(nbr_id_from_bl < ((T16C_4NT * sub_sample)+1))
while(nbr_id_from_bl < ((T16C_4NT)+1))
{
/* To Obtain the next unavailable idx flag after reverse neighbor substitution */
/* Devide by 4 to obtain the original index */
frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/
frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
/* The Top-left flag is at the last bit location of nbr_flags*/
if(nbr_id_from_bl == (T16C_4NT * sub_sample / 2))
if(nbr_id_from_bl == (T16C_4NT / 2))
{
get_bits = GET_BIT(nbr_flags, 16);
/* only pel substitution for TL */
@ -382,11 +379,11 @@ void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
UWORD16 *pu2_dst;
/* 4 pel substitution (other than TL) */
pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample);
ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
}
}
nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample;
nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT / 2)) ? 1 : 4;
}
}
}

View file

@ -45,8 +45,9 @@
// WORD32 filter_flag_q)
//
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -56,7 +57,7 @@
.type ihevc_deblk_chroma_horz_av8, %function
ENTRY ihevc_deblk_chroma_horz_av8
ihevc_deblk_chroma_horz_av8:
sxtw x4,w4
sxtw x5,w5
sxtw x6,w6
@ -165,7 +166,6 @@ l1.3528:
l1.3540:
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -46,8 +46,9 @@
// WORD32 filter_flag_p,
// WORD32 filter_flag_q)
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -57,7 +58,7 @@
.type ihevc_deblk_chroma_vert_av8, %function
ENTRY ihevc_deblk_chroma_vert_av8
ihevc_deblk_chroma_vert_av8:
sxtw x4,w4
sxtw x5,w5
sxtw x6,w6
@ -204,7 +205,6 @@ l1.3204:
l1.3228:
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -36,8 +36,8 @@
//*
//*******************************************************************************/
.include "ihevc_neon_macros.s"
.text
.align 4
.extern gai4_ihevc_tc_table
@ -46,7 +46,7 @@
.type ihevc_deblk_luma_horz_av8, %function
ENTRY ihevc_deblk_luma_horz_av8
ihevc_deblk_luma_horz_av8:
// stmfd sp!, {x3-x12,x14}
sxtw x5,w5
sxtw x6,w6
@ -434,7 +434,6 @@ l1.2404:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16 // Loading d9 using { ldr d9,[sp]; add sp,sp,#8 } is giving bus error.
// d8 is used as dummy register and loaded along with d9 using ldp. d8 is not used in the function.
EXIT_FUNC
ret
// x4=flag p
@ -585,7 +584,6 @@ l1.2852:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16 // Loading d9 using { ldr d9,[sp]; add sp,sp,#8 } is giving bus error.
// d8 is used as dummy register and loaded along with d9 using ldp. d8 is not used in the function.
EXIT_FUNC
ret

View file

@ -37,8 +37,8 @@
//*
//*******************************************************************************/
.include "ihevc_neon_macros.s"
.text
.align 4
@ -49,7 +49,7 @@
.type ihevc_deblk_luma_vert_av8, %function
ENTRY ihevc_deblk_luma_vert_av8
ihevc_deblk_luma_vert_av8:
sxtw x5,w5
sxtw x6,w6
@ -450,7 +450,6 @@ l1.964:
ldp d12,d13,[sp],#16
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
EXIT_FUNC
ret
l1.968:
@ -631,7 +630,6 @@ l1.1412:
ldp d12,d13,[sp],#16
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
EXIT_FUNC
ret

View file

@ -91,14 +91,14 @@
//x5 => ht
//x6 => wd
.include "ihevc_neon_macros.s"
.text
.align 4
.globl ihevc_inter_pred_chroma_copy_av8
.type ihevc_inter_pred_chroma_copy_av8, %function
ENTRY ihevc_inter_pred_chroma_copy_av8
ihevc_inter_pred_chroma_copy_av8:
LSL x12,x6,#1 //wd << 1
CMP x5,#0 //checks ht == 0
@ -142,8 +142,7 @@ END_INNER_LOOP_WD_4:
BGT OUTER_LOOP_WD_4_HT_2
END_LOOPS:
EXIT_FUNC
ret
RET
OUTER_LOOP_WD_4_HT_2:
SUBS x4,x12,#0 //checks wd == 0
@ -252,7 +251,6 @@ INNER_LOOP_WD_16_HT_2:
LD1 {v1.16b},[x7],x2 //vld1_u8(pu1_src_tmp)
ST1 {v1.16b},[x6],x3 //vst1_u8(pu1_dst_tmp, tmp_src)
EXIT_FUNC
ret
RET

View file

@ -92,14 +92,16 @@
//x5 => ht
//x6 => wd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_inter_pred_chroma_copy_w16out_av8
.type ihevc_inter_pred_chroma_copy_w16out_av8, %function
ENTRY ihevc_inter_pred_chroma_copy_w16out_av8
ihevc_inter_pred_chroma_copy_w16out_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -171,7 +173,6 @@ end_loops:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
@ -338,7 +339,6 @@ core_loop_wd_8_ht_2:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -93,15 +93,16 @@
//x2 => src_strd
//x3 => dst_strd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_chroma_horz_av8
.type ihevc_inter_pred_chroma_horz_av8, %function
ENTRY ihevc_inter_pred_chroma_horz_av8
ihevc_inter_pred_chroma_horz_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -768,7 +769,6 @@ end_loops:
ldp d13,d14,[sp],#16
ldp d11,d12,[sp],#16
ldp d9,d10,[sp],#16
EXIT_FUNC
ret

View file

@ -91,16 +91,17 @@
//x3 => dst_strd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_chroma_horz_w16out_av8
.type ihevc_inter_pred_chroma_horz_w16out_av8, %function
ENTRY ihevc_inter_pred_chroma_horz_w16out_av8
ihevc_inter_pred_chroma_horz_w16out_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -793,7 +794,6 @@ end_loops:
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
ldp d10,d11,[sp],#16
EXIT_FUNC
ret

View file

@ -92,15 +92,16 @@
//x1 => *pi2_dst
//x2 => src_strd
//x3 => dst_strd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_chroma_vert_av8
.type ihevc_inter_pred_chroma_vert_av8, %function
ENTRY ihevc_inter_pred_chroma_vert_av8
ihevc_inter_pred_chroma_vert_av8:
// stmfd sp!,{x4-x12,x14} //stack stores the values of the arguments
@ -398,7 +399,6 @@ end_loops:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -92,14 +92,16 @@
//x2 => src_strd
//x3 => dst_strd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_inter_pred_chroma_vert_w16inp_av8
.type ihevc_inter_pred_chroma_vert_w16inp_av8, %function
ENTRY ihevc_inter_pred_chroma_vert_w16inp_av8
ihevc_inter_pred_chroma_vert_w16inp_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -347,7 +349,6 @@ end_loops:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -92,15 +92,16 @@
//x1 => *pi2_dst
//x2 => src_strd
//x3 => dst_strd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_chroma_vert_w16inp_w16out_av8
.type ihevc_inter_pred_chroma_vert_w16inp_w16out_av8, %function
ENTRY ihevc_inter_pred_chroma_vert_w16inp_w16out_av8
ihevc_inter_pred_chroma_vert_w16inp_w16out_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -335,7 +336,6 @@ end_loops:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -93,14 +93,16 @@
//x2 => src_strd
//x3 => dst_strd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_inter_pred_chroma_vert_w16out_av8
.type ihevc_inter_pred_chroma_vert_w16out_av8, %function
ENTRY ihevc_inter_pred_chroma_vert_w16out_av8
ihevc_inter_pred_chroma_vert_w16out_av8:
// stmfd sp!,{x4-x12,x14} //stack stores the values of the arguments
@ -384,7 +386,6 @@ end_loops:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -103,15 +103,16 @@
// x5 => ht
// x6 => wd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_luma_horz_av8
.type ihevc_inter_pred_luma_horz_av8, %function
ENTRY ihevc_inter_pred_luma_horz_av8
ihevc_inter_pred_luma_horz_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
push_v_regs
@ -285,7 +286,6 @@ end_loops:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp], #16
pop_v_regs
EXIT_FUNC
ret
@ -481,7 +481,6 @@ end_loops1:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp], #16
pop_v_regs
EXIT_FUNC
ret
@ -596,7 +595,6 @@ end_inner_loop_4:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp], #16
pop_v_regs
EXIT_FUNC
ret

View file

@ -103,15 +103,16 @@
// x12 => *pi1_coeff
// x5 => ht
// x3 => wd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_luma_vert_av8
.type ihevc_inter_pred_luma_vert_av8, %function
ENTRY ihevc_inter_pred_luma_vert_av8
ihevc_inter_pred_luma_vert_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -427,7 +428,6 @@ end_loops:
bne lbl409
ldp x19, x20,[sp], #16
EXIT_FUNC
ret
lbl409:
mov x5, #4
@ -518,6 +518,5 @@ end_inner_loop_wd_4:
// ldmfd sp!, {x4-x12, x15} //reload the registers from sp
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -94,15 +94,16 @@
// word32 ht,
// word32 wd )
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_luma_vert_w16inp_av8
.type ihevc_inter_pred_luma_vert_w16inp_av8, %function
ENTRY ihevc_inter_pred_luma_vert_w16inp_av8
ihevc_inter_pred_luma_vert_w16inp_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -383,7 +384,6 @@ end_loops:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -62,13 +62,12 @@
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_luma_vert_w16out_av8
.type ihevc_inter_pred_luma_vert_w16out_av8, %function
ENTRY ihevc_inter_pred_luma_vert_w16out_av8
ihevc_inter_pred_luma_vert_w16out_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -379,7 +378,6 @@ end_loops_16out:
bne lbl355
ldp x19, x20,[sp], #16
EXIT_FUNC
ret
lbl355:
mov x5, #4
@ -473,7 +471,6 @@ end_inner_loop_wd_4_16out:
// ldmfd sp!, {x4-x12, x15} //reload the registers from sp
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -71,14 +71,16 @@
// x11 => ht
// x16 => wd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_inter_pred_luma_copy_av8
.type ihevc_inter_pred_luma_copy_av8, %function
ENTRY ihevc_inter_pred_luma_copy_av8
ihevc_inter_pred_luma_copy_av8:
// stmfd sp!, {x8-x16, lr} //stack stores the values of the arguments
stp x19,x20,[sp, #-16]!
mov x16,x6 //loads wd
@ -123,7 +125,6 @@ end_loops:
// MRS x20,PMCCFILTR_EL0
sub x0,x20,x19
ldp x19,x20,[sp],#16
EXIT_FUNC
ret
@ -158,7 +159,6 @@ end_inner_loop_wd_8:
// MRS x20,PMCCFILTR_EL0
sub x0,x20,x19
ldp x19,x20,[sp],#16
EXIT_FUNC
ret
core_loop_wd_16:
@ -192,7 +192,6 @@ end_inner_loop_wd_16:
// MRS x20,PMCCFILTR_EL0
sub x0,x20,x19
ldp x19,x20,[sp],#16
EXIT_FUNC
ret

View file

@ -72,15 +72,16 @@
// x7 => ht
// x12 => wd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_luma_copy_w16out_av8
.type ihevc_inter_pred_luma_copy_w16out_av8, %function
ENTRY ihevc_inter_pred_luma_copy_w16out_av8
ihevc_inter_pred_luma_copy_w16out_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -139,7 +140,6 @@ end_loops:
ldp x19, x20,[sp], #16
EXIT_FUNC
ret
@ -265,7 +265,6 @@ epilog_end:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -107,15 +107,16 @@
//x15 - #1
//x16 - src_ptx1
//x19 - loop_counter
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_inter_pred_luma_horz_w16out_av8
.type ihevc_inter_pred_luma_horz_w16out_av8, %function
ENTRY ihevc_inter_pred_luma_horz_w16out_av8
ihevc_inter_pred_luma_horz_w16out_av8:
// stmfd sp!, {x8-x16, x19} //stack stores the values of the arguments
push_v_regs
@ -304,7 +305,6 @@ height_residue_4:
bne lbl280
ldp x19, x20,[sp], #16
pop_v_regs
EXIT_FUNC
ret
lbl280:
@ -365,7 +365,6 @@ end_inner_loop_height_residue_4:
// ldmfd sp!,{x8-x16,pc} //reload the registers from sp
ldp x19, x20,[sp], #16
pop_v_regs
EXIT_FUNC
ret
outer_loop8_residual:
@ -477,7 +476,6 @@ end_inner_loop_8:
// ldmfd sp!,{x8-x16,pc} //reload the registers from sp
ldp x19, x20,[sp], #16
pop_v_regs
EXIT_FUNC
ret
@ -668,7 +666,6 @@ end_loops1:
// ldmfd sp!,{x8-x16,pc} //reload the registers from sp
ldp x19, x20,[sp], #16
pop_v_regs
EXIT_FUNC
ret

View file

@ -102,14 +102,16 @@
// r5 => ht
// r6 => wd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_inter_pred_luma_vert_w16inp_w16out_av8
.type ihevc_inter_pred_luma_vert_w16inp_w16out_av8, %function
ENTRY ihevc_inter_pred_luma_vert_w16inp_w16out_av8
ihevc_inter_pred_luma_vert_w16inp_w16out_av8:
//stmfd sp!, {r4-r12, r14} //stack stores the values of the arguments
@ -406,7 +408,6 @@ end_loops:
//ldmfd sp!,{r4-r12,r15} //reload the registers from sp
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -92,8 +92,9 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -101,7 +102,7 @@
.type ihevc_intra_pred_chroma_dc_av8, %function
ENTRY ihevc_intra_pred_chroma_dc_av8
ihevc_intra_pred_chroma_dc_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
push_v_regs
@ -292,7 +293,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -84,15 +84,16 @@
//x2 => *pu1_dst
//x3 => dst_strd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_intra_pred_chroma_horz_av8
.type ihevc_intra_pred_chroma_horz_av8, %function
ENTRY ihevc_intra_pred_chroma_horz_av8
ihevc_intra_pred_chroma_horz_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -188,7 +189,6 @@ core_loop_16:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
b endloop
@ -270,7 +270,6 @@ core_loop_8:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
b endloop
@ -319,7 +318,6 @@ core_loop_4:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
b endloop
@ -355,7 +353,6 @@ core_loop_4:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
endloop:

View file

@ -92,8 +92,9 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -101,7 +102,7 @@
.type ihevc_intra_pred_chroma_mode2_av8, %function
ENTRY ihevc_intra_pred_chroma_mode2_av8
ihevc_intra_pred_chroma_mode2_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
push_v_regs
@ -302,7 +303,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -92,8 +92,9 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -101,7 +102,7 @@
.type ihevc_intra_pred_chroma_mode_18_34_av8, %function
ENTRY ihevc_intra_pred_chroma_mode_18_34_av8
ihevc_intra_pred_chroma_mode_18_34_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -188,7 +189,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -81,8 +81,9 @@
// word32 nt,
// word32 mode)
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_intra_pred_chroma_mode_27_to_33_av8
@ -91,7 +92,7 @@
.type ihevc_intra_pred_chroma_mode_27_to_33_av8, %function
ENTRY ihevc_intra_pred_chroma_mode_27_to_33_av8
ihevc_intra_pred_chroma_mode_27_to_33_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -548,7 +549,6 @@ end_loops:
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
ldp d9,d10,[sp],#16
EXIT_FUNC
ret

View file

@ -86,8 +86,10 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -99,7 +101,7 @@
.type ihevc_intra_pred_chroma_mode_3_to_9_av8, %function
ENTRY ihevc_intra_pred_chroma_mode_3_to_9_av8
ihevc_intra_pred_chroma_mode_3_to_9_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -487,7 +489,6 @@ end_func:
ldp d8,d15,[sp],#16 // Loading d15 using { ldr d15,[sp]; add sp,sp,#8 } is giving bus error.
// d8 is used as dummy register and loaded along with d15 using ldp. d8 is not used in the function.
ldp d13,d14,[sp],#16
EXIT_FUNC
ret

View file

@ -92,8 +92,9 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_intra_pred_chroma_planar_av8
@ -102,7 +103,7 @@
.type ihevc_intra_pred_chroma_planar_av8, %function
ENTRY ihevc_intra_pred_chroma_planar_av8
ihevc_intra_pred_chroma_planar_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -373,7 +374,6 @@ end_loop:
// d8 is used as dummy register and loaded along with d14 using ldp. d8 is not used in the function.
ldp d12,d13,[sp],#16
ldp d10,d11,[sp],#16
EXIT_FUNC
ret

View file

@ -87,15 +87,16 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_intra_pred_chroma_ver_av8
.type ihevc_intra_pred_chroma_ver_av8, %function
ENTRY ihevc_intra_pred_chroma_ver_av8
ihevc_intra_pred_chroma_ver_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
push_v_regs
@ -225,7 +226,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -88,8 +88,9 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -101,7 +102,7 @@
.type ihevc_intra_pred_chroma_mode_11_to_17_av8, %function
ENTRY ihevc_intra_pred_chroma_mode_11_to_17_av8
ihevc_intra_pred_chroma_mode_11_to_17_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -616,7 +617,6 @@ end_func:
ldp x19, x20,[sp],#16
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
EXIT_FUNC
ret

View file

@ -88,8 +88,9 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_intra_pred_chroma_mode_19_to_25_av8
@ -99,7 +100,7 @@
.type ihevc_intra_pred_chroma_mode_19_to_25_av8, %function
ENTRY ihevc_intra_pred_chroma_mode_19_to_25_av8
ihevc_intra_pred_chroma_mode_19_to_25_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -570,7 +571,6 @@ end_loops:
ldp d8,d14,[sp],#16 // Loading d14 using { ldr d14,[sp]; add sp,sp,#8 } is giving bus error.
// d8 is used as dummy register and loaded along with d14 using ldp. d8 is not used in the function.
ldp d12,d13,[sp],#16
EXIT_FUNC
ret

View file

@ -88,8 +88,9 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -101,7 +102,7 @@
.type ihevc_intra_pred_luma_mode_11_to_17_av8, %function
ENTRY ihevc_intra_pred_luma_mode_11_to_17_av8
ihevc_intra_pred_luma_mode_11_to_17_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -690,7 +691,6 @@ end_func:
ldp x19, x20,[sp],#16
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
EXIT_FUNC
ret

View file

@ -88,8 +88,9 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -100,7 +101,7 @@
.type ihevc_intra_pred_luma_mode_19_to_25_av8, %function
ENTRY ihevc_intra_pred_luma_mode_19_to_25_av8
ihevc_intra_pred_luma_mode_19_to_25_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -660,7 +661,6 @@ end_loops:
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
ldp d9,d10,[sp],#16
EXIT_FUNC
ret

View file

@ -92,15 +92,16 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_intra_pred_luma_dc_av8
.type ihevc_intra_pred_luma_dc_av8, %function
ENTRY ihevc_intra_pred_luma_dc_av8
ihevc_intra_pred_luma_dc_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -510,7 +511,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -84,8 +84,9 @@
//x2 => *pu1_dst
//x3 => dst_strd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -93,7 +94,7 @@
.type ihevc_intra_pred_luma_horz_av8, %function
ENTRY ihevc_intra_pred_luma_horz_av8
ihevc_intra_pred_luma_horz_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -188,7 +189,6 @@ core_loop_32:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
b end_func
@ -269,7 +269,6 @@ core_loop_16:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
b end_func
@ -316,7 +315,6 @@ core_loop_8:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
b end_func
@ -352,7 +350,6 @@ core_loop_4:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret
end_func:

View file

@ -92,8 +92,9 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -101,7 +102,7 @@
.type ihevc_intra_pred_luma_mode2_av8, %function
ENTRY ihevc_intra_pred_luma_mode2_av8
ihevc_intra_pred_luma_mode2_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -269,7 +270,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -92,8 +92,9 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -101,7 +102,7 @@
.type ihevc_intra_pred_luma_mode_18_34_av8, %function
ENTRY ihevc_intra_pred_luma_mode_18_34_av8
ihevc_intra_pred_luma_mode_18_34_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
push_v_regs
@ -277,7 +278,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -85,8 +85,9 @@
//x2 => *pu1_dst
//x3 => dst_strd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -96,7 +97,7 @@
.type ihevc_intra_pred_luma_mode_27_to_33_av8, %function
ENTRY ihevc_intra_pred_luma_mode_27_to_33_av8
ihevc_intra_pred_luma_mode_27_to_33_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -553,7 +554,6 @@ end_loops:
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
ldp d9,d10,[sp],#16
EXIT_FUNC
ret

View file

@ -88,8 +88,9 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -102,7 +103,7 @@
.type ihevc_intra_pred_luma_mode_3_to_9_av8, %function
ENTRY ihevc_intra_pred_luma_mode_3_to_9_av8
ihevc_intra_pred_luma_mode_3_to_9_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -562,7 +563,6 @@ end_func:
ldp x19, x20,[sp],#16
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
EXIT_FUNC
ret

View file

@ -92,8 +92,9 @@
// mode
// pi1_coeff
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -103,7 +104,7 @@
.type ihevc_intra_pred_luma_planar_av8, %function
ENTRY ihevc_intra_pred_luma_planar_av8
ihevc_intra_pred_luma_planar_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -557,7 +558,6 @@ end_loop:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -88,8 +88,9 @@
// nt
// mode
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -97,7 +98,7 @@
.type ihevc_intra_pred_luma_ver_av8, %function
ENTRY ihevc_intra_pred_luma_ver_av8
ihevc_intra_pred_luma_ver_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -423,7 +424,6 @@ end_func:
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -105,8 +105,10 @@
// x12
// x11
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -121,7 +123,7 @@
.type ihevc_itrans_recon_16x16_av8, %function
ENTRY ihevc_itrans_recon_16x16_av8
ihevc_itrans_recon_16x16_av8:
ldr w11, [sp]
// stmfd sp!,{x4-x12,x14}
@ -1224,7 +1226,6 @@ skip_last8rows_stage2_kernel2:
// ldmfd sp!,{x4-x12,pc}
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -124,8 +124,9 @@
//d5[2]= 43 d7[2]=9
//d5[3]= 38 d7[3]=4
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -142,7 +143,7 @@
.type ihevc_itrans_recon_32x32_av8, %function
ENTRY ihevc_itrans_recon_32x32_av8
ihevc_itrans_recon_32x32_av8:
ldr w11, [sp]
@ -3041,7 +3042,6 @@ prediction_buffer:
// ldmfd sp!,{x0-x12,pc}
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -100,8 +100,10 @@
// x6 => dst_strd
// x7 => zero_cols
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.set shift_stage1_idct , 7
.set shift_stage2_idct , 12
@ -114,7 +116,7 @@
.type ihevc_itrans_recon_4x4_av8, %function
ENTRY ihevc_itrans_recon_4x4_av8
ihevc_itrans_recon_4x4_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -227,7 +229,6 @@ ENTRY ihevc_itrans_recon_4x4_av8
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -103,8 +103,10 @@
// x6 => dst_strd
// x7 => zero_cols
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.set shift_stage1_idct , 7
.set shift_stage2_idct , 12
@ -113,7 +115,7 @@
.type ihevc_itrans_recon_4x4_ttype1_av8, %function
ENTRY ihevc_itrans_recon_4x4_ttype1_av8
ihevc_itrans_recon_4x4_ttype1_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -232,7 +234,6 @@ ENTRY ihevc_itrans_recon_4x4_ttype1_av8
// ldmfd sp!,{x4-x12,x15} //reload the registers from sp
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -105,8 +105,10 @@
// zero_cols
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
@ -121,7 +123,7 @@
.type ihevc_itrans_recon_8x8_av8, %function
ENTRY ihevc_itrans_recon_8x8_av8
ihevc_itrans_recon_8x8_av8:
////register usage.extern - loading and until idct of columns
//// cosine constants - d0
//// sine constants - d1
@ -1028,7 +1030,6 @@ pred_buff_addition:
// ldmfd sp!,{x4-x12,pc}
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -69,14 +69,14 @@
// x1 => *pu1_src
// x2 => num_bytes
.include "ihevc_neon_macros.s"
.text
.p2align 2
.global ihevc_memcpy_mul_8_av8
.type ihevc_memcpy_mul_8_av8, %function
ENTRY ihevc_memcpy_mul_8_av8
ihevc_memcpy_mul_8_av8:
LOOP_NEON_MEMCPY_MUL_8:
// Memcpy 8 bytes
@ -85,7 +85,6 @@ LOOP_NEON_MEMCPY_MUL_8:
SUBS x2,x2,#8
BNE LOOP_NEON_MEMCPY_MUL_8
EXIT_FUNC
ret
@ -105,7 +104,7 @@ LOOP_NEON_MEMCPY_MUL_8:
.global ihevc_memcpy_av8
.type ihevc_memcpy_av8, %function
ENTRY ihevc_memcpy_av8
ihevc_memcpy_av8:
SUBS x2,x2,#8
BLT ARM_MEMCPY
LOOP_NEON_MEMCPY:
@ -127,7 +126,6 @@ LOOP_ARM_MEMCPY:
SUBS x2,x2,#1
BNE LOOP_ARM_MEMCPY
MEMCPY_RETURN:
EXIT_FUNC
ret
@ -142,13 +140,14 @@ MEMCPY_RETURN:
// x2 => num_bytes
.text
.p2align 2
.global ihevc_memset_mul_8_av8
.type ihevc_memset_mul_8_av8, %function
ENTRY ihevc_memset_mul_8_av8
ihevc_memset_mul_8_av8:
// Assumptions: numbytes is either 8, 16 or 32
dup v0.8b,w1
@ -159,7 +158,6 @@ LOOP_MEMSET_MUL_8:
SUBS x2,x2,#8
BNE LOOP_MEMSET_MUL_8
EXIT_FUNC
ret
@ -178,7 +176,7 @@ LOOP_MEMSET_MUL_8:
.global ihevc_memset_av8
.type ihevc_memset_av8, %function
ENTRY ihevc_memset_av8
ihevc_memset_av8:
SUBS x2,x2,#8
BLT ARM_MEMSET
dup v0.8b,w1
@ -200,7 +198,6 @@ LOOP_ARM_MEMSET:
BNE LOOP_ARM_MEMSET
MEMSET_RETURN:
EXIT_FUNC
ret
@ -215,13 +212,14 @@ MEMSET_RETURN:
// x2 => num_words
.text
.p2align 2
.global ihevc_memset_16bit_mul_8_av8
.type ihevc_memset_16bit_mul_8_av8, %function
ENTRY ihevc_memset_16bit_mul_8_av8
ihevc_memset_16bit_mul_8_av8:
// Assumptions: num_words is either 8, 16 or 32
@ -233,7 +231,6 @@ LOOP_MEMSET_16BIT_MUL_8:
SUBS x2,x2,#8
BNE LOOP_MEMSET_16BIT_MUL_8
EXIT_FUNC
ret
@ -252,7 +249,7 @@ LOOP_MEMSET_16BIT_MUL_8:
.global ihevc_memset_16bit_av8
.type ihevc_memset_16bit_av8, %function
ENTRY ihevc_memset_16bit_av8
ihevc_memset_16bit_av8:
SUBS x2,x2,#8
BLT ARM_MEMSET_16BIT
dup v0.8h,w1
@ -274,7 +271,6 @@ LOOP_ARM_MEMSET_16BIT:
BNE LOOP_ARM_MEMSET_16BIT
MEMSET_16BIT_RETURN:
EXIT_FUNC
ret

View file

@ -47,53 +47,3 @@
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
.endm
// --- Internal Security Dispatchers ---
// These expand to real instructions only if the compiler flags are present.
.macro BTI_ENABLE
#if defined(__ARM_FEATURE_BTI_DEFAULT)
bti c
#endif
.endm
.macro PAC_ENTRY
#if defined(__ARM_FEATURE_PAC_DEFAULT)
paciasp
#endif
.endm
.macro PAC_EXIT
#if defined(__ARM_FEATURE_PAC_DEFAULT)
autiasp
#endif
.endm
// --- Main ENTRY and EXIT_FUNC Macros ---
.macro ENTRY name
.p2align 2
\name:
BTI_ENABLE
PAC_ENTRY
.endm
.macro EXIT_FUNC
PAC_EXIT
.endm
// --- GNU Property Note ---
// Signals BTI and PAC support to the Android linker.
#if defined(__linux__) && defined(__aarch64__)
.pushsection .note.gnu.property, "a" // Switch to Note section
.p2align 3
.word 4 // Name size
.word 16 // Data size
.word 5 // NT_GNU_PROPERTY_TYPE_0
.asciz "GNU" // Owner
.word 0xc0000000 // GNU_PROPERTY_AARCH64_FEATURE_1_AND
.word 4 // Data size
.word 3 // Value: BTI (Bit 0) | PAC (Bit 1)
.word 0 // Padding
.popsection // Switch back to previous section
#endif

View file

@ -85,14 +85,14 @@
// x2 => ht
// x3 => pad_size
.include "ihevc_neon_macros.s"
.text
.align 4
.globl ihevc_pad_left_luma_av8
.type ihevc_pad_left_luma_av8, %function
ENTRY ihevc_pad_left_luma_av8
ihevc_pad_left_luma_av8:
loop_start_luma_left:
// pad size is assumed to be pad_left = 80
@ -148,7 +148,6 @@ loop_start_luma_left:
bne loop_start_luma_left
EXIT_FUNC
ret
@ -210,7 +209,7 @@ loop_start_luma_left:
.type ihevc_pad_left_chroma_av8, %function
ENTRY ihevc_pad_left_chroma_av8
ihevc_pad_left_chroma_av8:
loop_start_chroma_left:
@ -267,7 +266,6 @@ loop_start_chroma_left:
bne loop_start_chroma_left
EXIT_FUNC
ret
@ -339,7 +337,7 @@ loop_start_chroma_left:
.type ihevc_pad_right_luma_av8, %function
ENTRY ihevc_pad_right_luma_av8
ihevc_pad_right_luma_av8:
loop_start_luma_right:
@ -397,7 +395,6 @@ loop_start_luma_right:
bne loop_start_luma_right
EXIT_FUNC
ret
@ -458,7 +455,7 @@ loop_start_luma_right:
.type ihevc_pad_right_chroma_av8, %function
ENTRY ihevc_pad_right_chroma_av8
ihevc_pad_right_chroma_av8:
loop_start_chroma_right:
@ -515,7 +512,6 @@ loop_start_chroma_right:
bne loop_start_chroma_right
EXIT_FUNC
ret

View file

@ -61,13 +61,14 @@
//x9 => wd 60
//x10=> ht 64
.include "ihevc_neon_macros.s"
.text
.p2align 2
.include "ihevc_neon_macros.s"
.globl gu1_table_band_idx
.globl ihevc_sao_band_offset_chroma_av8
ENTRY ihevc_sao_band_offset_chroma_av8
ihevc_sao_band_offset_chroma_av8:
mov x8,#0
mov x9,#0
mov x10,#0
@ -423,7 +424,6 @@ END_LOOP:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
pop_v_regs
EXIT_FUNC
ret

View file

@ -65,11 +65,12 @@
.include "ihevc_neon_macros.s"
.text
.p2align 2
.globl gu1_table_band_idx
.globl ihevc_sao_band_offset_luma_av8
ENTRY ihevc_sao_band_offset_luma_av8
ihevc_sao_band_offset_luma_av8:
// STMFD sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -243,7 +244,6 @@ HEIGHT_LOOP:
ldp d8,d15,[sp],#16 // Loading d15 using { ldr d15,[sp]; add sp,sp,#8 } is giving bus error.
// d8 is used as dummy register and loaded along with d15 using ldp. d8 is not used in the function.
ldp d13,d14,[sp],#16
EXIT_FUNC
ret

View file

@ -59,14 +59,15 @@
//x9 => wd
//x10=> ht
.text
.p2align 2
.include "ihevc_neon_macros.s"
.text
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class0_av8
ENTRY ihevc_sao_edge_offset_class0_av8
ihevc_sao_edge_offset_class0_av8:
// STMFD sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -337,7 +338,6 @@ END_LOOPS:
// LDMFD sp!,{x4-x12,x15} //Reload the registers from SP
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -60,13 +60,14 @@
//x9 => wd
//x10=> ht
.include "ihevc_neon_macros.s"
.text
.p2align 2
.include "ihevc_neon_macros.s"
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class0_chroma_av8
ENTRY ihevc_sao_edge_offset_class0_chroma_av8
ihevc_sao_edge_offset_class0_chroma_av8:
ldr x8,[sp,#0]
ldr x9,[sp,#8]
@ -476,7 +477,6 @@ END_LOOPS:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -58,14 +58,15 @@
//x7 => wd
//x8 => ht
.text
.p2align 2
.include "ihevc_neon_macros.s"
.text
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class1_av8
ENTRY ihevc_sao_edge_offset_class1_av8
ihevc_sao_edge_offset_class1_av8:
// STMFD sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -354,7 +355,6 @@ END_LOOPS:
// LDMFD sp!,{x4-x12,x15} //Reload the registers from SP
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -60,13 +60,14 @@
//x8 => wd
//x9 => ht
.include "ihevc_neon_macros.s"
.text
.p2align 2
.include "ihevc_neon_macros.s"
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class1_chroma_av8
ENTRY ihevc_sao_edge_offset_class1_chroma_av8
ihevc_sao_edge_offset_class1_chroma_av8:
ldr x8,[sp,#0]
@ -457,7 +458,6 @@ END_LOOPS:
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -58,14 +58,15 @@
//x7 => wd
//x8=> ht
.text
.p2align 2
.include "ihevc_neon_macros.s"
.text
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class2_av8
ENTRY ihevc_sao_edge_offset_class2_av8
ihevc_sao_edge_offset_class2_av8:
// STMFD sp!,{x4-x12,x14} //stack stores the values of the arguments
@ -845,7 +846,6 @@ END_LOOPS:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -60,13 +60,14 @@
//x7 => wd
//x8=> ht
.include "ihevc_neon_macros.s"
.text
.p2align 2
.include "ihevc_neon_macros.s"
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class2_chroma_av8
ENTRY ihevc_sao_edge_offset_class2_chroma_av8
ihevc_sao_edge_offset_class2_chroma_av8:
// STMFD sp!,{x4-x12,x14} //stack stores the values of the arguments
@ -1131,7 +1132,6 @@ END_LOOPS:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -58,14 +58,15 @@
//x7 => wd
//x8=> ht
.text
.p2align 2
.include "ihevc_neon_macros.s"
.text
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class3_av8
ENTRY ihevc_sao_edge_offset_class3_av8
ihevc_sao_edge_offset_class3_av8:
// STMFD sp!,{x4-x12,x14} //stack stores the values of the arguments
@ -888,7 +889,6 @@ END_LOOPS:
ldp x23, x24,[sp], #16
ldp x21, x22,[sp], #16
ldp x19, x20,[sp], #16
EXIT_FUNC
ret

View file

@ -60,12 +60,13 @@
//x7 => wd
//x8=> ht
.include "ihevc_neon_macros.s"
.text
.p2align 2
.include "ihevc_neon_macros.s"
.globl gi1_table_edge_idx
.globl ihevc_sao_edge_offset_class3_chroma_av8
ENTRY ihevc_sao_edge_offset_class3_chroma_av8
ihevc_sao_edge_offset_class3_chroma_av8:
// STMFD sp!,{x4-x12,x14} //stack stores the values of the arguments
@ -1166,7 +1167,6 @@ END_LOOPS:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -134,14 +134,16 @@
// x14 => ht
// x7 => wd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_weighted_pred_bi_av8
.type ihevc_weighted_pred_bi_av8, %function
ENTRY ihevc_weighted_pred_bi_av8
ihevc_weighted_pred_bi_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -305,7 +307,6 @@ end_loops:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -107,15 +107,16 @@
// x7 => lvl_shift2
// x8 => ht
// x9 => wd
.text
.align 4
.include "ihevc_neon_macros.s"
.text
.globl ihevc_weighted_pred_bi_default_av8
.type ihevc_weighted_pred_bi_default_av8, %function
ENTRY ihevc_weighted_pred_bi_default_av8
ihevc_weighted_pred_bi_default_av8:
ldr w8,[sp,#0]
ldr w9,[sp,#8]
@ -533,7 +534,6 @@ end_loops:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -112,14 +112,16 @@
// x8 => ht
// x9 => wd
.include "ihevc_neon_macros.s"
.text
.align 4
.include "ihevc_neon_macros.s"
.globl ihevc_weighted_pred_uni_av8
.type ihevc_weighted_pred_uni_av8, %function
ENTRY ihevc_weighted_pred_uni_av8
ihevc_weighted_pred_uni_av8:
// stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments
@ -238,7 +240,6 @@ end_loops:
ldp x21, x22,[sp],#16
ldp x19, x20,[sp],#16
EXIT_FUNC
ret

View file

@ -18,7 +18,6 @@ list(
"${HEVC_ROOT}/common/ihevc_trans_tables.c"
"${HEVC_ROOT}/common/ihevc_recon.c"
"${HEVC_ROOT}/common/ihevc_itrans.c"
"${HEVC_ROOT}/common/ihevc_itrans_res.c"
"${HEVC_ROOT}/common/ihevc_itrans_recon.c"
"${HEVC_ROOT}/common/ihevc_iquant_recon.c"
"${HEVC_ROOT}/common/ihevc_iquant_itrans_recon.c"
@ -30,7 +29,6 @@ list(
"${HEVC_ROOT}/common/ihevc_chroma_iquant_itrans_recon.c"
"${HEVC_ROOT}/common/ihevc_chroma_recon.c"
"${HEVC_ROOT}/common/ihevc_chroma_itrans_recon_16x16.c"
"${HEVC_ROOT}/common/ihevc_chroma_itrans_recon_32x32.c"
"${HEVC_ROOT}/common/ihevc_chroma_itrans_recon_8x8.c"
"${HEVC_ROOT}/common/ihevc_buf_mgr.c"
"${HEVC_ROOT}/common/ihevc_disp_mgr.c"
@ -64,7 +62,7 @@ list(
include_directories(${HEVC_ROOT}/common)
# arm/x86 sources
if("${SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${SYSTEM_PROCESSOR}" STREQUAL "arm64")
if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64")
list(
APPEND
LIBHEVC_COMMON_ASMS
@ -137,7 +135,7 @@ if("${SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${SYSTEM_PROCESSOR}" STREQUAL "a
"${HEVC_ROOT}/common/arm64/ihevc_weighted_pred_uni.s")
include_directories(${HEVC_ROOT}/common/arm64 ${HEVC_ROOT}/common/arm)
elseif("${SYSTEM_PROCESSOR}" STREQUAL "aarch32")
elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32")
list(
APPEND
LIBHEVC_COMMON_ASMS

File diff suppressed because it is too large Load diff

View file

@ -92,13 +92,9 @@ typedef enum
IHEVC_CAB_COEFFY_PREFIX = IHEVC_CAB_COEFFX_PREFIX + 18,
IHEVC_CAB_CODED_SUBLK_IDX = IHEVC_CAB_COEFFY_PREFIX + 18,
IHEVC_CAB_COEFF_FLAG = IHEVC_CAB_CODED_SUBLK_IDX + 4,
IHEVC_CAB_COEFABS_GRTR1_FLAG = IHEVC_CAB_COEFF_FLAG + 44,
IHEVC_CAB_COEFABS_GRTR1_FLAG = IHEVC_CAB_COEFF_FLAG + 42,
IHEVC_CAB_COEFABS_GRTR2_FLAG = IHEVC_CAB_COEFABS_GRTR1_FLAG + 24,
IHEVC_CAB_CCP_LOG2_RES_ABS = IHEVC_CAB_COEFABS_GRTR2_FLAG + 6,
IHEVC_CAB_CCP_RES_SIGN_FLAG = IHEVC_CAB_CCP_LOG2_RES_ABS + 8,
IHEVC_CAB_EXPLICIT_RDPCM_FLAG = IHEVC_CAB_CCP_RES_SIGN_FLAG + 2,
IHEVC_CAB_EXPLICIT_RDPCM_DIR = IHEVC_CAB_EXPLICIT_RDPCM_FLAG + 2,
IHEVC_CAB_CTXT_END = IHEVC_CAB_EXPLICIT_RDPCM_DIR + 2,
IHEVC_CAB_CTXT_END = IHEVC_CAB_COEFABS_GRTR2_FLAG + 6
}IHEVC_CABAC_CTXT_OFFSETS;

View file

@ -126,14 +126,7 @@ typedef void ihevc_intra_pred_chroma_ref_substitution_ft(UWORD8 *pu1_top_left,
WORD32 nt,
WORD32 nbr_flags,
UWORD8 *pu1_dst,
WORD32 dst_strd,
WORD32 chroma_format_idc);
typedef void ihevc_intra_pred_chroma_ref_filtering_ft(UWORD8 *pu1_src,
WORD32 nt,
UWORD8 *pu1_dst,
WORD32 mode,
WORD32 intra_smoothing_flags);
WORD32 dst_strd);
typedef void ihevc_hbd_intra_pred_chroma_planar_ft(
UWORD16 *pu2_ref,
@ -247,7 +240,6 @@ ihevc_intra_pred_chroma_mode_11_to_17_ft ihevc_intra_pred_chroma_mode_11_to_17;
ihevc_intra_pred_chroma_mode_19_to_25_ft ihevc_intra_pred_chroma_mode_19_to_25;
ihevc_intra_pred_chroma_mode_27_to_33_ft ihevc_intra_pred_chroma_mode_27_to_33;
ihevc_intra_pred_chroma_ref_substitution_ft ihevc_intra_pred_chroma_ref_substitution;
ihevc_intra_pred_chroma_ref_filtering_ft ihevc_intra_pred_chroma_ref_filtering;
ihevc_hbd_intra_pred_chroma_planar_ft ihevc_hbd_intra_pred_chroma_planar;
ihevc_hbd_intra_pred_chroma_dc_ft ihevc_hbd_intra_pred_chroma_dc;

View file

@ -64,7 +64,6 @@
#include "ihevc_typedefs.h"
#include "ihevc_macros.h"
#include "ihevc_defs.h"
#include "ihevc_func_selector.h"
#include "ihevc_platform_macros.h"
#include "ihevc_intra_pred.h"
@ -130,8 +129,7 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
WORD32 nt,
WORD32 nbr_flags,
UWORD8 *pu1_dst,
WORD32 dst_strd,
WORD32 chroma_format_idc)
WORD32 dst_strd)
{
UWORD8 pu1_ref_u, pu1_ref_v;
WORD32 dc_val, i, j;
@ -208,7 +206,7 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
// U-V interleaved Top-top right samples
}
if(nt == 4 || (nt == 8 && chroma_format_idc == CHROMA_FMT_IDC_YUV444))
if(nt == 4)
{
/* 1 bit extraction for all the neighboring blocks */
tp_left = (nbr_flags & 0x10000) >> 16;
@ -276,9 +274,8 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
}
}
else if(nt == 8 || (nt == 16 && chroma_format_idc == CHROMA_FMT_IDC_YUV444))
else if(nt == 8)
{
WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1;
WORD32 nbr_flags_temp = 0;
nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
+ ((nbr_flags & 0x300) >> 4)
@ -288,16 +285,16 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
/* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
/* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
{
nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * (4 * sub_sample); /* for bottom left and left */
if(nbr_id_from_bl == 32 * sub_sample)
nbr_id_from_bl = 16 * sub_sample;
if(nbr_id_from_bl == 16 * sub_sample)
nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 4; /* for bottom left and left */
if(nbr_id_from_bl == 32)
nbr_id_from_bl = 16;
if(nbr_id_from_bl == 16)
{
/* for top left : 1 pel per nbr bit */
if(!((nbr_flags_temp >> 8) & 0x1))
{
nbr_id_from_bl++;
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4 * sub_sample; /* top and top right; 8 pels per nbr bit */
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4; /* top and top right; 8 pels per nbr bit */
}
}
@ -316,14 +313,14 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
}
/* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
while(nbr_id_from_bl < ((T8C_4NT * sub_sample)+1))
while(nbr_id_from_bl < ((T8C_4NT)+1))
{
/* To Obtain the next unavailable idx flag after reverse neighbor substitution */
/* Divide by 8 to obtain the original index */
frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/
frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
/* The Top-left flag is at the last bit location of nbr_flags*/
if(nbr_id_from_bl == (T8C_4NT * sub_sample / 2))
if(nbr_id_from_bl == (T8C_4NT / 2))
{
get_bits = GET_BIT(nbr_flags_temp, 8);
@ -342,23 +339,22 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
UWORD16 *pu2_dst;
/* 8 pel substitution (other than TL) */
pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample);
ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
}
}
nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample;
nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT / 2)) ? 1 : 4;
}
}
else if(nt == 16 || (nt == 32 && chroma_format_idc == CHROMA_FMT_IDC_YUV444))
else if(nt == 16)
{
WORD32 sub_sample = chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 2 : 1;
/* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
/* as each bit in nbr flags corresponds to 4 pels for bot_left, left, top and topright but 1 pel for topleft */
{
nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4 * sub_sample; /* for bottom left and left */
nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4; /* for bottom left and left */
if(nbr_id_from_bl == 32 * sub_sample)
if(nbr_id_from_bl == 32)
{
/* for top left : 1 pel per nbr bit */
if(!((nbr_flags >> 16) & 0x1))
@ -366,7 +362,7 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
/* top left not available */
nbr_id_from_bl++;
/* top and top right; 4 pels per nbr bit */
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4 * sub_sample;
nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4;
}
}
/* Reverse Substitution Process*/
@ -384,14 +380,14 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
}
/* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
while(nbr_id_from_bl < ((T16C_4NT * sub_sample)+1))
while(nbr_id_from_bl < ((T16C_4NT)+1))
{
/* To Obtain the next unavailable idx flag after reverse neighbor substitution */
/* Devide by 4 to obtain the original index */
frwd_nbr_flag = (nbr_id_from_bl >> (chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 3 : 2)); /*+ (nbr_id_from_bl & 0x1);*/
frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
/* The Top-left flag is at the last bit location of nbr_flags*/
if(nbr_id_from_bl == (T16C_4NT * sub_sample / 2))
if(nbr_id_from_bl == (T16C_4NT / 2))
{
get_bits = GET_BIT(nbr_flags, 16);
/* only pel substitution for TL */
@ -409,110 +405,17 @@ void ihevc_intra_pred_chroma_ref_substitution(UWORD8 *pu1_top_left,
UWORD16 *pu2_dst;
/* 4 pel substitution (other than TL) */
pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4 * sub_sample);
ihevc_memset_16bit((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
}
}
nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT * sub_sample / 2)) ? 1 : 4 * sub_sample;
nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT / 2)) ? 1 : 4;
}
}
}
}
/**
*******************************************************************************
*
* @brief
* Intra prediction interpolation filter for chroma ref_filtering (4:4:4)
*
*
* @par Description:
* Reference DC filtering for neighboring chroma samples dependent on TU size and
* mode Refer to section 8.4.4.2.3 in the standard
*
* @param[in] pu1_src
* UWORD8 pointer to the source
*
* @param[out] pu1_dst
* UWORD8 pointer to the destination
*
* @param[in] nt
* integer Transform Block size
*
* @param[in] mode
* integer intraprediction mode
*
* @param[in] strong_intra_smoothing_enable_flag
* integer containing intra_smoothing_disabled_flag and strong_smoothing_enable_flag
*
* @returns
*
* @remarks
* None
*
*******************************************************************************
*/
void ihevc_intra_pred_chroma_ref_filtering(UWORD8 *pu1_src,
WORD32 nt,
UWORD8 *pu1_dst,
WORD32 mode,
WORD32 intra_smoothing_flag)
{
WORD32 filter_flag;
WORD32 i; /* Generic indexing variable */
WORD32 four_nt = 4 * nt;
UWORD8 au1_flt[((4 * MAX_CU_SIZE) + 1) * 2];
WORD32 intra_smoothing_disabled_flag = (intra_smoothing_flag >> 3) & 0x1;
WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flag & 0x1;
UNUSED(strong_intra_smoothing_enable_flag);
if(intra_smoothing_disabled_flag)
{
if(pu1_src == pu1_dst) return;
for(i = 0; i < (2 * (four_nt + 1)); i += 2)
{
pu1_dst[i] = pu1_src[i];
pu1_dst[i + 1] = pu1_src[i + 1];
}
return;
}
filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2));
if(0 == filter_flag)
{
if(pu1_src == pu1_dst) return;
for(i = 0; i < (2 * (four_nt + 1)); i += 2)
{
pu1_dst[i] = pu1_src[i];
pu1_dst[i + 1] = pu1_src[i + 1];
}
}
else
{
/* Extremities Untouched*/
au1_flt[0] = pu1_src[0];
au1_flt[1] = pu1_src[1];
au1_flt[four_nt * 2] = pu1_src[four_nt * 2];
au1_flt[(four_nt * 2) + 1] = pu1_src[(four_nt * 2) + 1];
for(i = 2; i < four_nt * 2; i += 2)
{
au1_flt[i] = (pu1_src[i - 2] + 2 * pu1_src[i] + pu1_src[i + 2] + 2) >> 2;
au1_flt[i + 1] = (pu1_src[i - 1] + 2 * pu1_src[i + 1] + pu1_src[i + 3] + 2) >> 2;
}
for(i = 0; i < (2 * (four_nt + 1)); i += 2)
{
pu1_dst[i] = au1_flt[i];
pu1_dst[i + 1] = au1_flt[i + 1];
}
}
}
/**
*******************************************************************************
*

View file

@ -94,15 +94,6 @@ typedef void ihevc_hbd_chroma_itrans_recon_16x16_ft(WORD16 *pi2_src,
WORD32 zero_cols,
WORD32 zero_rows,
UWORD8 bit_depth);
typedef void ihevc_chroma_itrans_recon_32x32_ft(WORD16 *pi2_src,
WORD16 *pi2_tmp,
UWORD8 *pu1_pred,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 pred_strd,
WORD32 dst_strd,
WORD32 zero_cols,
WORD32 zero_rows);
ihevc_chroma_itrans_recon_4x4_ft ihevc_chroma_itrans_recon_4x4;
ihevc_hbd_chroma_itrans_recon_4x4_ft ihevc_hbd_chroma_itrans_recon_4x4;
@ -110,7 +101,6 @@ ihevc_chroma_itrans_recon_8x8_ft ihevc_chroma_itrans_recon_8x8;
ihevc_hbd_chroma_itrans_recon_8x8_ft ihevc_hbd_chroma_itrans_recon_8x8;
ihevc_chroma_itrans_recon_16x16_ft ihevc_chroma_itrans_recon_16x16;
ihevc_hbd_chroma_itrans_recon_16x16_ft ihevc_hbd_chroma_itrans_recon_16x16;
ihevc_chroma_itrans_recon_32x32_ft ihevc_chroma_itrans_recon_32x32;
/* A9 Q Function Declarations */
ihevc_chroma_itrans_recon_4x4_ft ihevc_chroma_itrans_recon_4x4_a9q;

File diff suppressed because it is too large Load diff

View file

@ -306,157 +306,3 @@ void ihevc_chroma_recon_16x16(WORD16 *pi2_src,
}
}
/**
*******************************************************************************
*
* @brief
* This function performs reconstruction for 32x32 input block
*
* @par Description:
* Performs reconstruction of 32x32 input block by adding adding prediction
* data to input and clipping it to 8 bit
*
* @param[in] pi2_src
* Input 32x32 coefficients
*
* @param[in] pu1_pred
* Prediction 32x32 block
*
* @param[out] pu1_dst
* Output 32x32 block
*
* @param[in] src_strd
* Input stride
*
* @param[in] pred_strd
* Prediction stride
*
* @param[in] dst_strd
* Output Stride
*
* @param[in] shift
* Output shift
*
* @param[in] zero_cols
* Zero columns in pi2_tmp
*
* @returns Void
*
* @remarks
* None
*
*******************************************************************************
*/
void ihevc_chroma_recon_32x32(WORD16 *pi2_src,
UWORD8 *pu1_pred,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 pred_strd,
WORD32 dst_strd,
WORD32 zero_cols)
{
WORD32 i, j;
WORD32 trans_size;
trans_size = TRANS_SIZE_32;
/* Reconstruction */
for(i = 0; i < trans_size; i++)
{
/* Checking for Zero Cols */
if((zero_cols & 1) == 1)
{
for(j = 0; j < trans_size; j++)
{
pu1_dst[j * dst_strd] = pu1_pred[j * pred_strd];
}
}
else
{
for(j = 0; j < trans_size; j++)
{
pu1_dst[j * dst_strd] =
CLIP_U8(pi2_src[j * src_strd] + pu1_pred[j * pred_strd]);
}
}
pi2_src++;
pu1_dst += 2;
pu1_pred += 2;
zero_cols = zero_cols >> 1;
}
}
/**
******************************************************************************
*
* @brief Constructs chroma recon with Cross Component Prediction (CCP)
*
* @par Description
* This routine uses reconstructed luma residual samples to predict chroma
* residual samples as per HEVC Specification Section 8.6.6. It scales the
* luma residual by a signaled alpha value and adds it to the chroma residual
* prior to final reconstruction.
*
* @param[in] pi2_luma_res
* pointer to the luma residual
*
* @param[in] pi2_chroma_res
* pointer to the chroma residual
*
* @param[in] pu1_pred
* prediction block
*
* @param[in] pu1_dst
* destination block
*
* @param[in] alpha
* scaling factor for the luma residual
*
* @param[in] trans_size
* transform size
*
* @param[in] luma_res_stride
* stride of the luma residual buffer
*
* @param[in] chroma_res_stride
* stride of the chroma residual buffer
*
* @param[in] pred_strd
* Prediction stride
*
* @param[in] dst_strd
* Output Stride
*
* @return success or failure error code
*
******************************************************************************
*/
void ihevc_chroma_recon_nxn_ccp(WORD16 *pi2_luma_res,
WORD16 *pi2_chroma_res,
UWORD8 *pu1_pred,
UWORD8 *pu1_dst,
WORD32 alpha,
WORD32 trans_size,
WORD32 luma_res_stride,
WORD32 chroma_res_stride,
WORD32 pred_stride,
WORD32 dst_stride)
{
WORD32 i, j;
for(i = 0; i < trans_size; i++)
{
for(j = 0; j < trans_size; j++)
{
WORD32 res = (alpha * pi2_luma_res[j]) >> 3;
pu1_dst[j * 2] = CLIP_U8(pu1_pred[j * 2] + (pi2_chroma_res[j] + res));
}
pi2_luma_res += luma_res_stride;
pi2_chroma_res += chroma_res_stride;
pu1_dst += dst_stride;
pu1_pred += pred_stride;
}
}

View file

@ -88,13 +88,6 @@ typedef void ihevc_hbd_chroma_recon_16x16_ft(WORD16 *pi2_src,
WORD32 dst_strd,
WORD32 zero_cols,
UWORD8 bit_depth);
typedef void ihevc_chroma_recon_32x32_ft(WORD16 *pi2_src,
UWORD8 *pu1_pred,
UWORD8 *pu1_dst,
WORD32 src_strd,
WORD32 pred_strd,
WORD32 dst_strd,
WORD32 zero_cols);
ihevc_chroma_recon_4x4_ft ihevc_chroma_recon_4x4;
ihevc_hbd_chroma_recon_4x4_ft ihevc_hbd_chroma_recon_4x4;
@ -102,17 +95,5 @@ ihevc_chroma_recon_8x8_ft ihevc_chroma_recon_8x8;
ihevc_hbd_chroma_recon_8x8_ft ihevc_hbd_chroma_recon_8x8;
ihevc_chroma_recon_16x16_ft ihevc_chroma_recon_16x16;
ihevc_hbd_chroma_recon_16x16_ft ihevc_hbd_chroma_recon_16x16;
ihevc_chroma_recon_32x32_ft ihevc_chroma_recon_32x32;
void ihevc_chroma_recon_nxn_ccp(WORD16 *pi2_luma_res,
WORD16 *pi2_chroma_res,
UWORD8 *pu1_pred,
UWORD8 *pu1_dst,
WORD32 alpha,
WORD32 trans_size,
WORD32 luma_res_stride,
WORD32 chroma_res_stride,
WORD32 pred_stride,
WORD32 dst_stride);
#endif /*_IHEVC_CHROMA_RECON_H_*/

View file

@ -40,10 +40,7 @@
/*****************************************************************************/
enum
{
IHEVC_PROFILE_UNKNOWN = -1,
IHEVC_PROFILE_MAIN = 0,
IHEVC_PROFILE_MAIN_STILL = 1,
IHEVC_PROFILE_MAIN_REXT = 2,
};
enum

View file

@ -46,9 +46,6 @@
#define REF_WIDTH 1280
#define REF_HEIGHT 720
extern WORD8 gai1_ihevc_luma_filter[4][NTAPS_LUMA];
extern WORD8 gai1_ihevc_chroma_filter[8][NTAPS_LUMA];
/*****************************************************************************/
/* Function Declarations */
/*****************************************************************************/

View file

@ -61,26 +61,6 @@
#include "ihevc_func_selector.h"
#include "ihevc_inter_pred.h"
WORD8 gai1_ihevc_luma_filter[4][NTAPS_LUMA] =
{
{ 0, 0, 0, 64, 0, 0, 0, 0 },
{ -1, 4, -10, 58, 17, -5, 1, 0 },
{ -1, 4, -11, 40, 40, -11, 4, -1 },
{ 0, 1, -5, 17, 58, -10, 4, -1 } };
/* The filter uses only the first four elements in each array */
WORD8 gai1_ihevc_chroma_filter[8][NTAPS_LUMA] =
{
{ 0, 64, 0, 0, 0, 0, 0, 0 },
{ -2, 58, 10, -2, 0, 0, 0, 0 },
{ -4, 54, 16, -2, 0, 0, 0, 0 },
{ -6, 46, 28, -4, 0, 0, 0, 0 },
{ -4, 36, 36, -4, 0, 0, 0, 0 },
{ -4, 28, 46, -6, 0, 0, 0, 0 },
{ -2, 16, 54, -4, 0, 0, 0, 0 },
{ -2, 10, 58, -2, 0, 0, 0, 0 } };
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/

View file

@ -159,7 +159,7 @@ typedef void ihevc_intra_pred_ref_filtering_ft(UWORD8 *pu1_src,
WORD32 nt,
UWORD8 *pu1_dst,
WORD32 mode,
WORD32 intra_smoothing_flags);
WORD32 strong_intra_smoothing_enable_flag);
typedef void ihevc_hbd_intra_pred_luma_planar_ft(
UWORD16 *pu2_ref,

View file

@ -627,11 +627,6 @@ void ihevc_intra_pred_luma_ref_substitution(UWORD8 *pu1_top_left,
* @param[in] mode
* integer intraprediction mode
*
* @param[in] intra_smoothing_flags
* integer bit 3 indicates if intra smoothing is enabled/disabled
* unconditionally. this is applicable to frext profiles only
* bit 0 indicates strong intra smoothing enabled/disabled
*
* @returns
*
* @remarks
@ -645,7 +640,7 @@ void ihevc_intra_pred_ref_filtering(UWORD8 *pu1_src,
WORD32 nt,
UWORD8 *pu1_dst,
WORD32 mode,
WORD32 intra_smoothing_flags)
WORD32 strong_intra_smoothing_enable_flag)
{
WORD32 filter_flag;
WORD32 i; /* Generic indexing variable */
@ -656,11 +651,9 @@ void ihevc_intra_pred_ref_filtering(UWORD8 *pu1_src,
WORD32 abs_cond_top_flag = 0;
/*WORD32 dc_val = 1 << (BIT_DEPTH - 5);*/
WORD32 dc_val = 1 << (8 - 5);
WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3);
WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1;
//WORD32 strong_intra_smoothing_enable_flag = 1;
filter_flag = intra_smoothing_disabled ?
0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)));
filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2));
if(0 == filter_flag)
{
if(pu1_src == pu1_dst)
@ -950,8 +943,8 @@ void ihevc_intra_pred_luma_dc(UWORD8 *pu1_ref,
* @param[in] nt
* integer Transform Block size
*
* @param[in] disable_boundary_filter
* disable boundary filtering
* @param[in] mode
* integer intraprediction mode
*
* @returns
*
@ -967,16 +960,17 @@ void ihevc_intra_pred_luma_horz(UWORD8 *pu1_ref,
UWORD8 *pu1_dst,
WORD32 dst_strd,
WORD32 nt,
WORD32 disable_boundary_filter)
WORD32 mode)
{
WORD32 row, col;
WORD32 two_nt;
WORD16 s2_predpixel;
UNUSED(mode);
UNUSED(src_strd);
two_nt = 2 * nt;
if(nt == 32 || disable_boundary_filter)
if(nt == 32)
{
for(row = 0; row < nt; row++)
for(col = 0; col < nt; col++)
@ -1029,8 +1023,8 @@ void ihevc_intra_pred_luma_horz(UWORD8 *pu1_ref,
* @param[in] nt
* integer Transform Block size
*
* @param[in] disable_boundary_filter
* disable boundary filtering
* @param[in] mode
* integer intraprediction mode
*
* @returns
*
@ -1046,14 +1040,15 @@ void ihevc_intra_pred_luma_ver(UWORD8 *pu1_ref,
UWORD8 *pu1_dst,
WORD32 dst_strd,
WORD32 nt,
WORD32 disable_boundary_filter)
WORD32 mode)
{
WORD32 row, col;
WORD16 s2_predpixel;
WORD32 two_nt = 2 * nt;
UNUSED(mode);
UNUSED(src_strd);
if(nt == 32 || disable_boundary_filter)
if(nt == 32)
{
/* Replication to next columns*/
for(row = 0; row < nt; row++)

File diff suppressed because it is too large Load diff

View file

@ -1,108 +0,0 @@
/******************************************************************************
*
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
/**
*******************************************************************************
* @file
* ihevc_itrans_res.h
*
* @brief
* Functions declarations for inverse transform
*
* @author
* Ittiam
*
* @remarks
* None
*
*******************************************************************************
*/
#ifndef _IHEVC_ITRANS_RES_H_
#define _IHEVC_ITRANS_RES_H_
typedef void ihevc_itrans_res_4x4_ttype1_ft(WORD16 *pi2_src,
WORD16 *pi2_tmp,
WORD16 *pi2_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 zero_cols,
WORD32 zero_rows);
typedef void ihevc_itrans_res_4x4_ft(WORD16 *pi2_src,
WORD16 *pi2_tmp,
WORD16 *pi2_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 zero_cols,
WORD32 zero_rows);
typedef void ihevcd_itrans_res_dc_ft(WORD16 *pi2_dst,
WORD32 dst_strd,
WORD32 log2_trans_size,
WORD16 i2_coeff_value);
typedef void ihevc_itrans_res_8x8_ft(WORD16 *pi2_src,
WORD16 *pi2_tmp,
WORD16 *pi2_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 zero_cols,
WORD32 zero_rows);
typedef void ihevc_itrans_res_16x16_ft(WORD16 *pi2_src,
WORD16 *pi2_tmp,
WORD16 *pi2_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 zero_cols,
WORD32 zero_rows);
typedef void ihevc_itrans_res_32x32_ft(WORD16 *pi2_src,
WORD16 *pi2_tmp,
WORD16 *pi2_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 zero_cols,
WORD32 zero_rows);
typedef void ihevc_res_4x4_transform(WORD16 *pi2_src,
WORD16 *pi2_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 zero_cols);
typedef void ihevc_res_nxn_transform(WORD16 *pi2_src,
WORD16 *pi2_dst,
WORD32 src_strd,
WORD32 dst_strd,
WORD32 trans_size,
WORD32 zero_cols);
/* C function declarations */
ihevc_itrans_res_4x4_ttype1_ft ihevc_itrans_res_4x4_ttype1;
ihevc_itrans_res_4x4_ft ihevc_itrans_res_4x4;
ihevcd_itrans_res_dc_ft ihevcd_itrans_res_dc;
ihevc_itrans_res_8x8_ft ihevc_itrans_res_8x8;
ihevc_itrans_res_16x16_ft ihevc_itrans_res_16x16;
ihevc_itrans_res_32x32_ft ihevc_itrans_res_32x32;
ihevc_res_4x4_transform ihevc_res_4x4_rotate;
ihevc_res_nxn_transform ihevc_res_nxn_copy;
ihevc_res_nxn_transform ihevc_res_nxn_rdpcm_horz;
ihevc_res_nxn_transform ihevc_res_nxn_rdpcm_vert;
#endif /*_IHEVC_ITRANS_RES_H_*/

View file

@ -1420,28 +1420,6 @@ typedef struct
*/
UWORD32 b3_chroma_intra_mode_idx : 3;
#ifdef ENABLE_MAIN_REXT_PROFILE
/**
* Cb CCP alpha magnitude
*/
UWORD32 b3_cb_log2_res_scale_abs_plus1 : 3;
/**
* Cb CCP alpha sign
*/
UWORD32 b1_cb_log2_res_sign : 1;
/**
* Cr CCP alpha magnitude
*/
UWORD32 b3_cr_log2_res_scale_abs_plus1 : 3;
/**
* Cr CCP alpha sign
*/
UWORD32 b1_cr_log2_res_sign : 1;
#endif
}tu_t;
/**
@ -2503,36 +2481,6 @@ typedef struct
*/
vui_t s_vui_parameters;
/**
* sps_extension_present_flag
*/
WORD8 i1_sps_extension_present_flag;
/**
* sps_range_extension_present_flag
*/
WORD8 i1_sps_range_extension_flag;
/**
* sps_multilayer_extension_present_flag
*/
WORD8 i1_sps_multilayer_extension_flag;
/**
* sps_3d_extension_present_flag
*/
WORD8 i1_sps_3d_extension_flag;
/**
* sps_scc_extension_present_flag
*/
WORD8 i1_sps_scc_extension_flag;
/**
* sps_extension_reserved
*/
WORD8 i1_sps_extension_4bits;
/**
* Log2(CTB Size) in luma units
*/
@ -2595,8 +2543,8 @@ typedef struct
/* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */
/* Intra 16x16 Y, 16x16 U, 16x16 V */
/* Inter 16x16 Y, 16x16 U, 16x16 V */
/* Intra 32x32 Y, 32x32 U, 32x32 V */
/* Inter 32x32 Y, 32x32 U, 32x32 V */
/* Intra 32x32 Y */
/* Inter 32x32 Y */
/*************************************************************************/
WORD16 *pi2_scaling_mat;
@ -2638,9 +2586,9 @@ typedef struct
WORD8 i1_use_high_precision_pred_wt;
/**
* persistent_rice_adaptation_enabled_flag
* fast_rice_adaptation_enabled_flag
*/
WORD8 i1_persistent_rice_adaptation_enabled_flag;
WORD8 i1_fast_rice_adaptation_enabled_flag;
/**
* cabac_bypass_alignment_enabled_flag
@ -2671,8 +2619,8 @@ typedef struct
/* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */
/* Intra 16x16 Y, 16x16 U, 16x16 V */
/* Inter 16x16 Y, 16x16 U, 16x16 V */
/* Intra 32x32 Y, 32x32 U, 32x32 V */
/* Inter 32x32 Y, 32x32 U, 32x32 V */
/* Intra 32x32 Y */
/* Inter 32x32 Y */
/*************************************************************************/
WORD16 *pi2_scaling_mat;
@ -2874,36 +2822,6 @@ typedef struct
*/
WORD8 i1_slice_extension_present_flag;
/**
* pps_extension_present_flag
*/
WORD8 i1_pps_extension_present_flag;
/**
* pps_range_extension_present_flag
*/
WORD8 i1_pps_range_extension_flag;
/**
* pps_multilayer_extension_present_flag
*/
WORD8 i1_pps_multilayer_extension_flag;
/**
* pps_3d_extension_present_flag
*/
WORD8 i1_pps_3d_extension_flag;
/**
* pps_scc_extension_present_flag
*/
WORD8 i1_pps_scc_extension_flag;
/**
* pps_extension_reserved
*/
WORD8 i1_pps_extension_4bits;
/**
* scaling_list_dc_coef_minus8
*/
@ -2924,7 +2842,7 @@ typedef struct
/**
* log2_max_transform_skip_block_size_minus2
*/
WORD8 i1_log2_max_transform_skip_block_size_minus2;
WORD32 i4_log2_max_transform_skip_block_size_minus2;
/**
* cross_component_prediction_enabled_flag

View file

@ -877,12 +877,6 @@ const WORD16 g_ai2_ihevc_trans_intr_4[4][8] =
const UWORD8 IHEVCE_CHROMA_SHUFFLEMASK_HBD[8] = { 0x00, 0x01, 0x04, 0x05,
0x08, 0x09, 0x0C, 0x0D };
const UWORD32 gau4_ihevcd_4_bit_reverse[] =
{
0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15
};
#ifndef DISABLE_AVX2
const WORD32 g_ai4_ihevc_trans_8_intr_avx2[7][8] =
{ /* 4*32 = 128 bit */

View file

@ -116,6 +116,4 @@ extern const WORD16 g_ai2_ihevc_trans_intr_4[4][8];
extern const UWORD8 IHEVCE_CHROMA_SHUFFLEMASK_HBD[8];
extern MEM_ALIGN16 const UWORD32 gau4_ihevcd_4_bit_reverse[16];
#endif /*_IHEVC_TRANS_TABLES_H_*/

View file

@ -91,11 +91,6 @@
* @param[in] mode
* integer intraprediction mode
*
* @param[in] intra_smoothing_flags
* integer bit 3 indicates if intra smoothing is enabled/disabled
* unconditionally. this is applicable to frext profiles only
* bit 0 indicates strong intra smoothing enabled/disabled
*
* @returns
*
* @remarks
@ -109,7 +104,7 @@ void ihevc_intra_pred_ref_filtering_sse42(UWORD8 *pu1_src,
WORD32 nt,
UWORD8 *pu1_dst,
WORD32 mode,
WORD32 intra_smoothing_flags)
WORD32 strong_intra_smoothing_enable_flag)
{
WORD32 filter_flag;
WORD32 i; /* Generic indexing variable */
@ -122,10 +117,11 @@ void ihevc_intra_pred_ref_filtering_sse42(UWORD8 *pu1_src,
__m128i src_temp1, src_temp2, src_temp3, src_temp7;
__m128i src_temp4, src_temp5, src_temp6, src_temp8;
WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3);
WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1;
filter_flag = intra_smoothing_disabled ?
0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)));
//WORD32 strong_intra_smoothing_enable_flag = 1;
filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2));
if(0 == filter_flag)
{
if(pu1_src == pu1_dst)

View file

@ -377,11 +377,6 @@ void ihevc_intra_pred_luma_ref_substitution_ssse3(UWORD8 *pu1_top_left,
* @param[in] mode
* integer intraprediction mode
*
* @param[in] intra_smoothing_flags
* integer bit 3 indicates if intra smoothing is enabled/disabled
* unconditionally. this is applicable to frext profiles only
* bit 0 indicates strong intra smoothing enabled/disabled
*
* @returns
*
* @remarks
@ -394,7 +389,7 @@ void ihevc_intra_pred_ref_filtering_ssse3(UWORD8 *pu1_src,
WORD32 nt,
UWORD8 *pu1_dst,
WORD32 mode,
WORD32 intra_smoothing_flags)
WORD32 strong_intra_smoothing_enable_flag)
{
WORD32 filter_flag;
WORD32 i; /* Generic indexing variable */
@ -407,10 +402,9 @@ void ihevc_intra_pred_ref_filtering_ssse3(UWORD8 *pu1_src,
__m128i src_temp1, src_temp2, src_temp3, src_temp7;
__m128i src_temp4, src_temp5, src_temp6, src_temp8;
WORD32 intra_smoothing_disabled = (intra_smoothing_flags >> 3);
WORD32 strong_intra_smoothing_enable_flag = intra_smoothing_flags & 1;
filter_flag = intra_smoothing_disabled ?
0 : (gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)));
//WORD32 strong_intra_smoothing_enable_flag = 1;
filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2));
if(0 == filter_flag)
{
if(pu1_src == pu1_dst)

View file

@ -0,0 +1,453 @@
@/*****************************************************************************
@*
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
@*
@* Licensed under the Apache License, Version 2.0 (the "License");
@* you may not use this file except in compliance with the License.
@* You may obtain a copy of the License at:
@*
@* http://www.apache.org/licenses/LICENSE-2.0
@*
@* Unless required by applicable law or agreed to in writing, software
@* distributed under the License is distributed on an "AS IS" BASIS,
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@* See the License for the specific language governing permissions and
@* limitations under the License.
@*
@*****************************************************************************/
@/**
@/*******************************************************************************
@* @file
@* ihevcd_fmt_conv_420sp_to_rgba8888.s
@*
@* @brief
@* contains function definitions for format conversions
@*
@* @author
@* ittiam
@*
@* @par list of functions:
@*
@*
@* @remarks
@* none
@*
@*******************************************************************************/
.equ DO1STROUNDING, 0
@ ARM
@
@ PRESERVE8
.text
.p2align 2
@/*****************************************************************************
@* *
@* Function Name : ihevcd_fmt_conv_420sp_to_rgba8888() *
@* *
@* Description : This function conversts the image from YUV422 color *
@* space to RGB888 color space. The function can be *
@* invoked at the MB level. *
@* *
@* Arguments : R0 pubY *
@* R1 pubUV *
@* R2 pusRGB *
@* R3 pusRGB *
@* [R13 #40] usHeight *
@* [R13 #44] usWidth *
@* [R13 #48] usStrideY *
@* [R13 #52] usStrideU *
@* [R13 #56] usStrideV *
@* [R13 #60] usStrideRGB *
@* *
@* Values Returned : None *
@* *
@* Register Usage : R0 - R14 *
@* *
@* Stack Usage : 104 Bytes *
@* *
@* Interruptibility : Interruptible *
@* *
@* Known Limitations *
@* Assumptions: Image Width: Assumed to be multiple of 16 and *
@* greater than or equal to 16 *
@* Image Height: Assumed to be even. *
@* *
@* Revision History : *
@* DD MM YYYY Author(s) Changes (Describe the changes made) *
@* 07 06 2010 Varshita Draft *
@* 07 06 2010 Naveen Kr T Completed *
@* 05 08 2013 Naveen K P Modified for HEVC *
@* 30 10 2018 Saurabh Sood Store D registers to stack *
@*****************************************************************************/
.global ihevcd_fmt_conv_420sp_to_rgba8888_a9q
.type ihevcd_fmt_conv_420sp_to_rgba8888_a9q, function
ihevcd_fmt_conv_420sp_to_rgba8888_a9q:
@// push the registers on the stack
STMFD SP!,{R4-R12,LR}
VPUSH {d8-d15}
@//R0 - Y PTR
@//R1 - UV PTR
@//R2 - RGB PTR
@//R3 - RGB PTR
@//R4 - PIC WIDTH
@//R5 - PIC HT
@//R6 - STRIDE Y
@//R7 - STRIDE U
@//R8 - STRIDE V
@//R9 - STRIDE RGB
@//ONE ROW PROCESSING AT A TIME
@//THE FOUR CONSTANTS ARE:
@//C1=0x3311,C2=0xF379,C3=0xE5F8,C4=0x4092
@PLD [R0]
@PLD [R1]
@PLD [R2]
@/* can be loaded from a defined const type */
MOVW R10,#0x3311
VMOV.16 D0[0],R10 @//C1
MOVW R10,#0xF379
VMOV.16 D0[1],R10 @//C2
MOVW R10,#0xE5F8
VMOV.16 D0[2],R10 @//C3
MOVW R10,#0x4092
VMOV.16 D0[3],R10 @//C4
@//LOAD CONSTANT 128 INTO A CORTEX REGISTER
MOV R10,#128
VDUP.8 D1,R10
@//D0 HAS C1-C2-C3-C4
@// load other parameters from stack
LDR R5,[sp,#104]
@LDR R4,[sp,#44]
LDR R6,[sp,#108]
LDR R7,[sp,#112]
@LDR R8,[sp,#52]
LDR R9,[sp,#116]
@// calculate offsets, offset = stride - width
SUB R10,R6,R3 @// luma offset
SUB R11,R7,R3
@, LSR #1 @// u offset
@SUB R12,R8,R3, LSR #1 @// v offset
SUB R14,R9,R3 @// rgb offset in pixels
@// calculate height loop count
MOV R5,R5, LSR #1 @// height_cnt = height / 16
@// create next row pointers for rgb and luma data
ADD R7,R0,R6 @// luma_next_row = luma + luma_stride
ADD R8,R2,R9,LSL #2 @// rgb_next_row = rgb + rgb_stride
LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP:
@//LOAD VALUES OF U&V AND COMPUTE THE R,G,B WEIGHT VALUES.
VLD1.8 {D2,D3},[R1]! @//LOAD 8 VALUES OF UV
@//VLD1.8 {D3},[R2]! @//LOAD 8 VALUES OF V
@// calculate width loop count
MOV R6,R3, LSR #4 @// width_cnt = width / 16
@//COMPUTE THE ACTUAL RGB VALUES,WE CAN DO TWO ROWS AT A TIME
@//LOAD VALUES OF Y 8-BIT VALUES
VLD2.8 {D30,D31},[R0]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row 1
@//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15
VLD2.8 {D28,D29},[R7]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row2
@//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15
SUBS R6,R6,#1
BEQ LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP_SKIP
LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP:
@VMOV.I8 Q1,#128
VUZP.8 D2,D3
@//NEED TO SUBTRACT (U-128) AND (V-128)
@//(D2-D1),(D3-D1)
VSUBL.U8 Q2,D2,D1 @//(U-128)
VSUBL.U8 Q3,D3,D1 @//(V-128)
@//LOAD VALUES OF U&V for next row
VLD1.8 {D2,D3},[R1]! @//LOAD 8 VALUES OF U
@//VLD1.8 {D3},[R2]! @//LOAD 8 VALUES OF V
@PLD [R0]
PLD [R1]
@//NEED TO MULTIPLY WITH Q2,Q3 WITH CO-EEFICIENTS
VMULL.S16 Q4,D4,D0[3] @//(U-128)*C4 FOR B
VMULL.S16 Q5,D5,D0[3] @//(U-128)*C4 FOR B
VMULL.S16 Q10,D6,D0[0] @//(V-128)*C1 FOR R
VMULL.S16 Q11,D7,D0[0] @//(V-128)*C1 FOR R
VMULL.S16 Q6,D4,D0[1] @//(U-128)*C2 FOR G
VMLAL.S16 Q6,D6,D0[2] @//Q6 = (U-128)*C2 + (V-128)*C3
VMULL.S16 Q7,D5,D0[1] @//(U-128)*C2 FOR G
VMLAL.S16 Q7,D7,D0[2] @//Q7 = (U-128)*C2 + (V-128)*C3
@//NARROW RIGHT SHIFT BY 13 FOR R&B
VQSHRN.S32 D8,Q4,#13 @//D8 = (U-128)*C4>>13 4 16-BIT VALUES
VQSHRN.S32 D9,Q5,#13 @//D9 = (U-128)*C4>>13 4 16-BIT VALUES
@//Q4 - WEIGHT FOR B
@//NARROW RIGHT SHIFT BY 13 FOR R&B
VQSHRN.S32 D10,Q10,#13 @//D10 = (V-128)*C1>>13 4 16-BIT VALUES
VQSHRN.S32 D11,Q11,#13 @//D11 = (V-128)*C1>>13 4 16-BIT VALUES
@//Q5 - WEIGHT FOR R
@//NARROW RIGHT SHIFT BY 13 FOR G
VQSHRN.S32 D12,Q6,#13 @//D12 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES
VQSHRN.S32 D13,Q7,#13 @//D13 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES
@//Q6 - WEIGHT FOR G
VADDW.U8 Q7,Q4,D30 @//Q7 - HAS Y + B
VADDW.U8 Q8,Q5,D30 @//Q8 - HAS Y + R
VADDW.U8 Q9,Q6,D30 @//Q9 - HAS Y + G
VADDW.U8 Q10,Q4,D31 @//Q10 - HAS Y + B
VADDW.U8 Q11,Q5,D31 @//Q11 - HAS Y + R
VADDW.U8 Q12,Q6,D31 @//Q12 - HAS Y + G
VQMOVUN.S16 D14,Q7
VQMOVUN.S16 D15,Q9
VQMOVUN.S16 D16,Q8
VMOV.I8 D17,#0
VZIP.8 D14,D15
VZIP.8 D16,D17
VZIP.16 Q7,Q8
VQMOVUN.S16 D20,Q10
VQMOVUN.S16 D21,Q12
VQMOVUN.S16 D22,Q11
VMOV.I8 D23,#0
VZIP.8 D20,D21
VZIP.8 D22,D23
VZIP.16 Q10,Q11
VZIP.32 Q7,Q10
VZIP.32 Q8,Q11
VST1.32 D14,[R2]!
VST1.32 D15,[R2]!
VST1.32 D20,[R2]!
VST1.32 D21,[R2]!
VST1.32 D16,[R2]!
VST1.32 D17,[R2]!
VST1.32 D22,[R2]!
VST1.32 D23,[R2]!
@//D14-D20 - TOALLY HAVE 16 VALUES
@//WE NEED TO SHIFT R,G,B VALUES TO GET 5BIT,6BIT AND 5BIT COMBINATIONS
VADDW.U8 Q7,Q4,D28 @//Q7 - HAS Y + B
VADDW.U8 Q8,Q5,D28 @//Q2 - HAS Y + R
VADDW.U8 Q9,Q6,D28 @//Q3 - HAS Y + G
VADDW.U8 Q10,Q4,D29 @//Q10 - HAS Y + B
VADDW.U8 Q11,Q5,D29 @//Q11 - HAS Y + R
VADDW.U8 Q12,Q6,D29 @//Q12 - HAS Y + G
@//COMPUTE THE ACTUAL RGB VALUES,WE CAN DO TWO ROWS AT A TIME
@//LOAD VALUES OF Y 8-BIT VALUES
VLD2.8 {D30,D31},[R0]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row 1
@//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15
VLD2.8 {D28,D29},[R7]! @//D0 - Y0,Y2,Y4,Y6,Y8,Y10,Y12,Y14 row2
@//D1 - Y1,Y3,Y5,Y7,Y9,Y11,Y13,Y15
PLD [R0]
PLD [R7]
VQMOVUN.S16 D14,Q7
VQMOVUN.S16 D15,Q9
VQMOVUN.S16 D16,Q8
VMOV.I8 D17,#0
VZIP.8 D14,D15
VZIP.8 D16,D17
VZIP.16 Q7,Q8
VQMOVUN.S16 D20,Q10
VQMOVUN.S16 D21,Q12
VQMOVUN.S16 D22,Q11
VMOV.I8 D23,#0
VZIP.8 D20,D21
VZIP.8 D22,D23
VZIP.16 Q10,Q11
VZIP.32 Q7,Q10
VZIP.32 Q8,Q11
VST1.32 D14,[R8]!
VST1.32 D15,[R8]!
VST1.32 D20,[R8]!
VST1.32 D21,[R8]!
VST1.32 D16,[R8]!
VST1.32 D17,[R8]!
VST1.32 D22,[R8]!
VST1.32 D23,[R8]!
SUBS R6,R6,#1 @// width_cnt -= 1
BNE LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP
LABEL_YUV420SP_TO_RGB8888_WIDTH_LOOP_SKIP:
@VMOV.I8 Q1,#128
VUZP.8 D2,D3
@//NEED TO SUBTRACT (U-128) AND (V-128)
@//(D2-D1),(D3-D1)
VSUBL.U8 Q2,D2,D1 @//(U-128)
VSUBL.U8 Q3,D3,D1 @//(V-128)
@//NEED TO MULTIPLY WITH Q2,Q3 WITH CO-EEFICIENTS
VMULL.S16 Q4,D4,D0[3] @//(U-128)*C4 FOR B
VMULL.S16 Q5,D5,D0[3] @//(U-128)*C4 FOR B
VMULL.S16 Q10,D6,D0[0] @//(V-128)*C1 FOR R
VMULL.S16 Q11,D7,D0[0] @//(V-128)*C1 FOR R
VMULL.S16 Q6,D4,D0[1] @//(U-128)*C2 FOR G
VMLAL.S16 Q6,D6,D0[2] @//Q6 = (U-128)*C2 + (V-128)*C3
VMULL.S16 Q7,D5,D0[1] @//(U-128)*C2 FOR G
VMLAL.S16 Q7,D7,D0[2] @//Q7 = (U-128)*C2 + (V-128)*C3
@//NARROW RIGHT SHIFT BY 13 FOR R&B
VQSHRN.S32 D8,Q4,#13 @//D8 = (U-128)*C4>>13 4 16-BIT VALUES
VQSHRN.S32 D9,Q5,#13 @//D9 = (U-128)*C4>>13 4 16-BIT VALUES
@//Q4 - WEIGHT FOR B
@//NARROW RIGHT SHIFT BY 13 FOR R&B
VQSHRN.S32 D10,Q10,#13 @//D10 = (V-128)*C1>>13 4 16-BIT VALUES
VQSHRN.S32 D11,Q11,#13 @//D11 = (V-128)*C1>>13 4 16-BIT VALUES
@//Q5 - WEIGHT FOR R
@//NARROW RIGHT SHIFT BY 13 FOR G
VQSHRN.S32 D12,Q6,#13 @//D12 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES
VQSHRN.S32 D13,Q7,#13 @//D13 = [(U-128)*C2 + (V-128)*C3]>>13 4 16-BIT VALUES
@//Q6 - WEIGHT FOR G
VADDW.U8 Q7,Q4,D30 @//Q7 - HAS Y + B
VADDW.U8 Q8,Q5,D30 @//Q8 - HAS Y + R
VADDW.U8 Q9,Q6,D30 @//Q9 - HAS Y + G
VADDW.U8 Q10,Q4,D31 @//Q10 - HAS Y + B
VADDW.U8 Q11,Q5,D31 @//Q11 - HAS Y + R
VADDW.U8 Q12,Q6,D31 @//Q12 - HAS Y + G
VQMOVUN.S16 D14,Q7
VQMOVUN.S16 D15,Q9
VQMOVUN.S16 D16,Q8
VMOV.I8 D17,#0
VZIP.8 D14,D15
VZIP.8 D16,D17
VZIP.16 Q7,Q8
VQMOVUN.S16 D20,Q10
VQMOVUN.S16 D21,Q12
VQMOVUN.S16 D22,Q11
VMOV.I8 D23,#0
VZIP.8 D20,D21
VZIP.8 D22,D23
VZIP.16 Q10,Q11
VZIP.32 Q7,Q10
VZIP.32 Q8,Q11
VST1.32 D14,[R2]!
VST1.32 D15,[R2]!
VST1.32 D20,[R2]!
VST1.32 D21,[R2]!
VST1.32 D16,[R2]!
VST1.32 D17,[R2]!
VST1.32 D22,[R2]!
VST1.32 D23,[R2]!
@//D14-D20 - TOALLY HAVE 16 VALUES
@//WE NEED TO SHIFT R,G,B VALUES TO GET 5BIT,6BIT AND 5BIT COMBINATIONS
VADDW.U8 Q7,Q4,D28 @//Q7 - HAS Y + B
VADDW.U8 Q8,Q5,D28 @//Q2 - HAS Y + R
VADDW.U8 Q9,Q6,D28 @//Q3 - HAS Y + G
VADDW.U8 Q10,Q4,D29 @//Q10 - HAS Y + B
VADDW.U8 Q11,Q5,D29 @//Q11 - HAS Y + R
VADDW.U8 Q12,Q6,D29 @//Q12 - HAS Y + G
VQMOVUN.S16 D14,Q7
VQMOVUN.S16 D15,Q9
VQMOVUN.S16 D16,Q8
VMOV.I8 D17,#0
VZIP.8 D14,D15
VZIP.8 D16,D17
VZIP.16 Q7,Q8
VQMOVUN.S16 D20,Q10
VQMOVUN.S16 D21,Q12
VQMOVUN.S16 D22,Q11
VMOV.I8 D23,#0
VZIP.8 D20,D21
VZIP.8 D22,D23
VZIP.16 Q10,Q11
VZIP.32 Q7,Q10
VZIP.32 Q8,Q11
VST1.32 D14,[R8]!
VST1.32 D15,[R8]!
VST1.32 D20,[R8]!
VST1.32 D21,[R8]!
VST1.32 D16,[R8]!
VST1.32 D17,[R8]!
VST1.32 D22,[R8]!
VST1.32 D23,[R8]!
@// Adjust the address pointers
ADD R0,R7,R10 @// luma = luma_next + offset
ADD R2,R8,R14,LSL #2 @// rgb = rgb_next + offset
ADD R7,R0,R3 @// luma_next = luma + width
ADD R8,R2,R3,LSL #2 @// rgb_next_row = rgb + width
ADD R1,R1,R11 @// adjust u pointer
@ADD R2,R2,R12 @// adjust v pointer
ADD R7,R7,R10 @// luma_next = luma + width + offset (because of register crunch)
ADD R8,R8,R14,LSL #2 @// rgb_next_row = rgb + width + offset
SUBS R5,R5,#1 @// height_cnt -= 1
BNE LABEL_YUV420SP_TO_RGB8888_HEIGHT_LOOP
@//POP THE REGISTERS
VPOP {d8-d15}
LDMFD SP!,{R4-R12,PC}
.section .note.GNU-stack,"",%progbits

View file

@ -58,6 +58,10 @@
#include "ihevcd_function_selector.h"
#include "ihevcd_structs.h"
void ihevcd_init_function_ptr_neonintr(codec_t *ps_codec);
void ihevcd_init_function_ptr_noneon(codec_t *ps_codec);
void ihevcd_init_function_ptr_a9q(codec_t *ps_codec);
void ihevcd_init_function_ptr_av8(codec_t *ps_codec);
void ihevcd_init_function_ptr(void *pv_codec)
{
codec_t *ps_codec = (codec_t *)pv_codec;
@ -67,11 +71,11 @@ void ihevcd_init_function_ptr(void *pv_codec)
{
#ifndef DISABLE_NEONINTR
case ARCH_ARM_NEONINTR:
ihevcd_init_function_ptr_neonintr(&ps_codec->s_func_selector);
ihevcd_init_function_ptr_neonintr(ps_codec);
break;
#endif
case ARCH_ARM_NONEON:
ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector);
ihevcd_init_function_ptr_noneon(ps_codec);
break;
default:
case ARCH_ARM_A5:
@ -80,9 +84,9 @@ void ihevcd_init_function_ptr(void *pv_codec)
case ARCH_ARM_A15:
case ARCH_ARM_A9Q:
#ifndef DISABLE_NEON
ihevcd_init_function_ptr_a9q(&ps_codec->s_func_selector);
ihevcd_init_function_ptr_a9q(ps_codec);
#else
ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector);
ihevcd_init_function_ptr_noneon(ps_codec);
#endif
break;
}
@ -102,17 +106,12 @@ void ihevcd_init_function_ptr(void *pv_codec)
switch(ps_codec->e_processor_arch)
{
case ARCH_ARM_NONEON:
ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector);
ihevcd_init_function_ptr_noneon(ps_codec);
break;
case ARCH_ARMV8_GENERIC:
default:
#ifdef DARWIN
ihevcd_init_function_ptr_noneon(&ps_codec->s_func_selector);
ihevcd_init_function_ptr_av8(ps_codec);
break;
#else
ihevcd_init_function_ptr_av8(&ps_codec->s_func_selector);
break;
#endif
}
#endif
}

View file

@ -54,113 +54,107 @@
#include "ihevc_dpb_mgr.h"
#include "ihevc_error.h"
#include "ihevcd_defs.h"
#include "ihevcd_function_selector.h"
#include "ihevcd_structs.h"
void ihevcd_init_function_ptr_a9q(func_selector_t *ps_func_selector)
void ihevcd_init_function_ptr_a9q(codec_t *ps_codec)
{
ps_func_selector->ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_a9q;
ps_func_selector->ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_a9q;
ps_func_selector->ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_a9q;
ps_func_selector->ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_a9q;
ps_func_selector->ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_a9q;
ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_a9q;
ps_func_selector->ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_a9q;
ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_a9q;
ps_func_selector->ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_a9q;
ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_a9q;
ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_a9q;
ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_a9q;
ps_func_selector->ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_a9q;
ps_func_selector->ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_a9q;
ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_a9q;
ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_a9q;
ps_func_selector->ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_a9q;
ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_a9q;
ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_a9q;
ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_a9q;
ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution_a9q;
ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution_a9q;
ps_func_selector->ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble;
ps_func_selector->ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_neonintr;
ps_func_selector->ihevc_intra_pred_chroma_ref_filtering_fptr = &ihevc_intra_pred_chroma_ref_filtering;
ps_func_selector->ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_a9q;
ps_func_selector->ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_a9q;
ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_a9q;
ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_a9q;
ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_a9q;
ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_a9q;
ps_func_selector->ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_a9q;
ps_func_selector->ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_a9q;
ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_a9q;
ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_a9q;
ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_a9q;
ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_a9q;
ps_func_selector->ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_a9q;
ps_func_selector->ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_a9q;
ps_func_selector->ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_a9q;
ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_a9q;
ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_a9q;
ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_a9q;
ps_func_selector->ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_a9q;
ps_func_selector->ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_a9q;
ps_func_selector->ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1;
ps_func_selector->ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4;
ps_func_selector->ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8;
ps_func_selector->ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16;
ps_func_selector->ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32;
ps_func_selector->ihevc_itrans_res_4x4_ttype1_fptr = &ihevc_itrans_res_4x4_ttype1;
ps_func_selector->ihevc_itrans_res_4x4_fptr = &ihevc_itrans_res_4x4;
ps_func_selector->ihevc_itrans_res_8x8_fptr = &ihevc_itrans_res_8x8;
ps_func_selector->ihevc_itrans_res_16x16_fptr = &ihevc_itrans_res_16x16;
ps_func_selector->ihevc_itrans_res_32x32_fptr = &ihevc_itrans_res_32x32;
ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_a9q;
ps_func_selector->ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_a9q;
ps_func_selector->ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_a9q;
ps_func_selector->ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_a9q;
ps_func_selector->ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_a9q;
ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4;
ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8;
ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16;
ps_func_selector->ihevc_chroma_itrans_recon_32x32_fptr = &ihevc_chroma_itrans_recon_32x32;
ps_func_selector->ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1;
ps_func_selector->ihevc_recon_4x4_fptr = &ihevc_recon_4x4;
ps_func_selector->ihevc_recon_8x8_fptr = &ihevc_recon_8x8;
ps_func_selector->ihevc_recon_16x16_fptr = &ihevc_recon_16x16;
ps_func_selector->ihevc_recon_32x32_fptr = &ihevc_recon_32x32;
ps_func_selector->ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4;
ps_func_selector->ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8;
ps_func_selector->ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16;
ps_func_selector->ihevc_chroma_recon_32x32_fptr = &ihevc_chroma_recon_32x32;
ps_func_selector->ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_a9q;
ps_func_selector->ihevc_memcpy_fptr = &ihevc_memcpy_a9q;
ps_func_selector->ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_a9q;
ps_func_selector->ihevc_memset_fptr = &ihevc_memset_a9q;
ps_func_selector->ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_a9q;
ps_func_selector->ihevc_memset_16bit_fptr = &ihevc_memset_16bit_a9q;
ps_func_selector->ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma_a9q;
ps_func_selector->ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma_a9q;
ps_func_selector->ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma_a9q;
ps_func_selector->ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma_a9q;
ps_func_selector->ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_a9q;
ps_func_selector->ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_a9q;
ps_func_selector->ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_a9q;
ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_neonintr;
ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_neonintr;
ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_neonintr;
ps_func_selector->ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_a9q;
ps_func_selector->ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_a9q;
ps_func_selector->ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_a9q;
ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_a9q;
ps_func_selector->ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_a9q;
ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_a9q;
ps_func_selector->ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_a9q;
ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_a9q;
ps_func_selector->ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_a9q;
ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_a9q;
ps_func_selector->ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp;
ps_func_selector->ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_a9q;
ps_func_selector->ihevcd_fmt_conv_444sp_to_444p_fptr = &ihevcd_fmt_conv_444sp_to_444p;
ps_func_selector->ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_a9q;
ps_func_selector->ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_a9q;
ps_func_selector->ihevcd_itrans_res_dc_fptr = &ihevcd_itrans_res_dc;
ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_a9q;
ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_a9q;
ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_a9q;
ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_a9q;
ps_codec->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr = &ihevc_intra_pred_luma_ref_subst_all_avlble;
ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_neonintr;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_a9q;
ps_codec->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_a9q;
ps_codec->s_func_selector.ihevc_itrans_4x4_ttype1_fptr = &ihevc_itrans_4x4_ttype1;
ps_codec->s_func_selector.ihevc_itrans_4x4_fptr = &ihevc_itrans_4x4;
ps_codec->s_func_selector.ihevc_itrans_8x8_fptr = &ihevc_itrans_8x8;
ps_codec->s_func_selector.ihevc_itrans_16x16_fptr = &ihevc_itrans_16x16;
ps_codec->s_func_selector.ihevc_itrans_32x32_fptr = &ihevc_itrans_32x32;
ps_codec->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_a9q;
ps_codec->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_a9q;
ps_codec->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_a9q;
ps_codec->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_a9q;
ps_codec->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32_a9q;
ps_codec->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4;
ps_codec->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8;
ps_codec->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16;
ps_codec->s_func_selector.ihevc_recon_4x4_ttype1_fptr = &ihevc_recon_4x4_ttype1;
ps_codec->s_func_selector.ihevc_recon_4x4_fptr = &ihevc_recon_4x4;
ps_codec->s_func_selector.ihevc_recon_8x8_fptr = &ihevc_recon_8x8;
ps_codec->s_func_selector.ihevc_recon_16x16_fptr = &ihevc_recon_16x16;
ps_codec->s_func_selector.ihevc_recon_32x32_fptr = &ihevc_recon_32x32;
ps_codec->s_func_selector.ihevc_chroma_recon_4x4_fptr = &ihevc_chroma_recon_4x4;
ps_codec->s_func_selector.ihevc_chroma_recon_8x8_fptr = &ihevc_chroma_recon_8x8;
ps_codec->s_func_selector.ihevc_chroma_recon_16x16_fptr = &ihevc_chroma_recon_16x16;
ps_codec->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_a9q;
ps_codec->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy_a9q;
ps_codec->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_a9q;
ps_codec->s_func_selector.ihevc_memset_fptr = &ihevc_memset_a9q;
ps_codec->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_a9q;
ps_codec->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit_a9q;
ps_codec->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma_a9q;
ps_codec->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma_a9q;
ps_codec->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma_a9q;
ps_codec->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma_a9q;
ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_a9q;
ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_a9q;
ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_a9q;
ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_neonintr;
ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_neonintr;
ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_neonintr;
ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr = &ihevc_sao_band_offset_luma_a9q;
ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr = &ihevc_sao_band_offset_chroma_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_a9q;
ps_codec->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_a9q;
ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr = &ihevcd_fmt_conv_420sp_to_rgba8888_a9q;
ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr = &ihevcd_fmt_conv_420sp_to_rgb565;
ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr = &ihevcd_fmt_conv_420sp_to_420sp;
ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr = &ihevcd_fmt_conv_420sp_to_420p_a9q;
ps_codec->s_func_selector.ihevcd_itrans_recon_dc_luma_fptr = &ihevcd_itrans_recon_dc_luma_a9q;
ps_codec->s_func_selector.ihevcd_itrans_recon_dc_chroma_fptr = &ihevcd_itrans_recon_dc_chroma_a9q;
}

Some files were not shown because too many files have changed in this diff Show more