Encoder: SVC encoding support added

Added support for encoding 'Scalable Baseline' profile, corresponding to profile_idc of 83 in 'Rec. ITU-T H.264 (11/2007)'. Bug: 248891908 Test: svcenc -c enc.cfg Change-Id: Ib12ca4c4a8c0e674738ae2af01558a08cefe0929
2026-04-02 20:30:48 +07:00 · 2023-01-30 14:16:47 +05:30 · 2023-01-30 14:16:47 +05:30 · bb0f31cb6b
commit bb0f31cb6b
parent da77ac1a5f
132 changed files with 85450 additions and 154 deletions
--- a/Android.bp
+++ b/Android.bp
@ -35,6 +35,17 @@ cc_library_headers {
    min_sdk_version: "29",
 }

+cc_library_headers {
+    name: "libsvcenc_headers",
+    export_include_dirs: [
+        "common",
+        "common/svc",
+        "encoder",
+        "encoder/svc"
+    ],
+    min_sdk_version: "29",
+}
+
 cc_library_headers {
    name: "libavcenc_headers",
    export_include_dirs: [
@ -44,6 +55,106 @@ cc_library_headers {
    min_sdk_version: "29",
 }

+cc_defaults {
+    name: "libavc_enc_defaults",
+    vendor_available: true,
+    host_supported: true,
+    shared_libs: [
+        "liblog",
+        "libcutils",
+    ],
+    cflags: [
+        "-DNDEBUG",
+        "-UHP_PL",
+        "-DN_MB_ENABLE",
+        "-fPIC",
+        "-O3",
+        "-Wall",
+        "-Werror",
+        "-Wno-error=constant-conversion",
+    ],
+    arch: {
+        arm: {
+            local_include_dirs: [
+                "common/arm",
+                "encoder/arm",
+            ],
+
+            cflags: [
+                "-DARM",
+                // These will be overriden by armv7_a_neon
+                "-DDISABLE_NEON",
+            ],
+
+            neon: {
+                cflags: [
+                    "-UDISABLE_NEON",
+                ],
+            },
+        },
+
+        arm64: {
+            cflags: [
+                "-DARMV8",
+                "-DARM",
+            ],
+            local_include_dirs: [
+                "common/arm",
+                "common/armv8",
+                "encoder/arm",
+                "encoder/armv8",
+            ],
+        },
+
+        riscv64: {
+            local_include_dirs: [
+                "common/riscv",
+                "encoder/riscv",
+            ],
+        },
+
+        x86: {
+            cflags: [
+                "-DX86",
+                "-msse4.2",
+            ],
+
+            local_include_dirs: [
+                "encoder/x86",
+                "common/x86",
+            ],
+        },
+
+        x86_64: {
+            cflags: [
+                "-DX86",
+                "-msse4.2",
+            ],
+
+            local_include_dirs: [
+                "encoder/x86",
+                "common/x86",
+            ],
+        },
+    },
+
+    sanitize: {
+        integer_overflow: true,
+        misc_undefined: ["bounds"],
+        cfi: true,
+        config: {
+            cfi_assembly_support: true,
+        },
+        blocklist: "libavc_blocklist.txt",
+    },
+
+    apex_available: [
+        "//apex_available:platform", //due to libstagefright_soft_avcenc
+        "com.android.media.swcodec",
+    ],
+    min_sdk_version: "29",
+}
+
 cc_defaults {
    name: "libavc_mvc_dec_defaults",
    cflags: [
@ -349,24 +460,7 @@ cc_library_static {

 cc_library_static {
    name: "libavcenc",
-    vendor_available: true,
-    host_supported: true,
-    shared_libs: [
-        "liblog",
-        "libcutils",
-    ],
-
-    cflags: [
-        "-DNDEBUG",
-        "-UHP_PL",
-        "-DN_MB_ENABLE",
-        "-fPIC",
-
-        "-O3",
-        "-Wall",
-        "-Werror",
-        "-Wno-error=constant-conversion",
-    ],
+    defaults: ["libavc_enc_defaults"],

    export_include_dirs: [
        "common",
@ -435,23 +529,11 @@ cc_library_static {

    arch: {
        arm: {
-            local_include_dirs: [
-                "encoder/arm",
-                "common/arm",
-            ],
-
            srcs: [
                "encoder/arm/ih264e_function_selector.c",
                "common/arm/ih264_arm_memory_barrier.s",
            ],

-            cflags: [
-                "-DARM",
-
-                // This will be overriden by armv7_a_neon
-                "-DDISABLE_NEON",
-            ],
-
            neon: {
                srcs: [
                    "encoder/arm/ih264e_function_selector_a9q.c",
@ -479,25 +561,10 @@ cc_library_static {
                    "encoder/arm/ih264e_fmt_conv.s",
                    "encoder/arm/ime_distortion_metrics_a9q.s",
                ],
-
-                cflags: [
-                    "-UDISABLE_NEON",
-                ],
            },
        },

        arm64: {
-            cflags: [
-                "-DARMV8",
-                "-DARM",
-            ],
-
-            local_include_dirs: [
-                "encoder/arm",
-                "encoder/armv8",
-                "common/armv8",
-            ],
-
            srcs: [
                "encoder/arm/ih264e_function_selector.c",
                "encoder/arm/ih264e_function_selector_av8.c",
@ -525,27 +592,12 @@ cc_library_static {
        },

        riscv64: {
-            local_include_dirs: [
-                "common/riscv",
-                "encoder/riscv",
-            ],
-
            srcs: [
                "encoder/riscv/ih264e_function_selector.c",
            ],
        },

        x86: {
-            cflags: [
-                "-DX86",
-                "-msse4.2",
-            ],
-
-            local_include_dirs: [
-                "encoder/x86",
-                "common/x86",
-            ],
-
            srcs: [
                "encoder/x86/ih264e_function_selector.c",
                "encoder/x86/ih264e_function_selector_sse42.c",
@ -571,16 +623,6 @@ cc_library_static {
        },

        x86_64: {
-            cflags: [
-                "-DX86",
-                "-msse4.2",
-            ],
-
-            local_include_dirs: [
-                "encoder/x86",
-                "common/x86",
-            ],
-
            srcs: [
                "encoder/x86/ih264e_function_selector.c",
                "encoder/x86/ih264e_function_selector_sse42.c",
@ -605,21 +647,161 @@ cc_library_static {
            ],
        },
    },
+}

-    sanitize: {
-        integer_overflow: true,
-        misc_undefined: ["bounds"],
-        cfi: true,
-        config: {
-            cfi_assembly_support: true,
-        },
-        blocklist: "libavc_blocklist.txt",
-    },
-    apex_available: [
-        "//apex_available:platform", //due to libstagefright_soft_avcenc
-        "com.android.media.swcodec",
+cc_library_static {
+    name: "libsvcenc",
+    defaults: ["libavc_enc_defaults"],
+    whole_static_libs: [
+        "libavcenc",
    ],
-    min_sdk_version: "29",
+
+    export_include_dirs: [
+        "common",
+        "common/svc",
+        "encoder",
+        "encoder/svc",
+    ],
+
+    srcs: [
+        "common/svc/isvc_cabac_tables.c",
+        "common/svc/isvc_common_tables.c",
+        "common/svc/isvc_intra_resample.c",
+        "common/svc/isvc_iquant_itrans_recon.c",
+        "common/svc/isvc_mem_fns.c",
+        "common/svc/isvc_resi_trans_quant.c",
+        "encoder/svc/irc_svc_rate_control_api.c",
+        "encoder/svc/isvce_api.c",
+        "encoder/svc/isvce_cabac.c",
+        "encoder/svc/isvce_cabac_encode.c",
+        "encoder/svc/isvce_cabac_init.c",
+        "encoder/svc/isvce_cavlc.c",
+        "encoder/svc/isvce_core_coding.c",
+        "encoder/svc/isvce_deblk.c",
+        "encoder/svc/isvce_downscaler.c",
+        "encoder/svc/isvce_encode.c",
+        "encoder/svc/isvce_encode_header.c",
+        "encoder/svc/isvce_fmt_conv.c",
+        "encoder/svc/isvce_function_selector_generic.c",
+        "encoder/svc/isvce_globals.c",
+        "encoder/svc/isvce_ibl_eval.c",
+        "encoder/svc/isvce_ilp_mv.c",
+        "encoder/svc/isvce_intra_modes_eval.c",
+        "encoder/svc/isvce_mc.c",
+        "encoder/svc/isvce_me.c",
+        "encoder/svc/isvce_mode_stat_visualiser.c",
+        "encoder/svc/isvce_nalu_stat_aggregator.c",
+        "encoder/svc/isvce_process.c",
+        "encoder/svc/isvce_rate_control.c",
+        "encoder/svc/isvce_rc_mem_interface.c",
+        "encoder/svc/isvce_rc_utils.c",
+        "encoder/svc/isvce_residual_pred.c",
+        "encoder/svc/isvce_sub_pic_rc.c",
+        "encoder/svc/isvce_utils.c",
+    ],
+
+    arch: {
+        arm: {
+            local_include_dirs: [
+                "common/arm/svc",
+                "encoder/arm/svc",
+            ],
+
+            srcs: [
+                "encoder/arm/svc/isvce_function_selector.c",
+            ],
+
+            neon: {
+                srcs: [
+                    "encoder/arm/svc/isvce_function_selector_a9q.c",
+                    "common/arm/svc/isvc_intra_sampling_neon.c",
+                    "common/arm/svc/isvc_iquant_itrans_recon_neon.c",
+                    "common/arm/svc/isvc_mem_fns_neon.c",
+                    "common/arm/svc/isvc_resi_trans_quant_neon.c",
+                    "encoder/arm/svc/isvce_downscaler_neon.c",
+                    "encoder/arm/svc/isvce_rc_utils_neon.c",
+                    "encoder/arm/svc/isvce_residual_pred_neon.c",
+                ],
+            },
+        },
+
+        arm64: {
+            local_include_dirs: [
+                "common/arm/svc",
+                "encoder/arm/svc",
+            ],
+
+            srcs: [
+                "encoder/arm/svc/isvce_function_selector.c",
+                "encoder/arm/svc/isvce_function_selector_av8.c",
+                "common/arm/svc/isvc_intra_sampling_neon.c",
+                "common/arm/svc/isvc_iquant_itrans_recon_neon.c",
+                "common/arm/svc/isvc_mem_fns_neon.c",
+                "common/arm/svc/isvc_resi_trans_quant_neon.c",
+                "encoder/arm/svc/isvce_downscaler_neon.c",
+                "encoder/arm/svc/isvce_rc_utils_neon.c",
+                "encoder/arm/svc/isvce_residual_pred_neon.c",
+            ],
+        },
+
+        riscv64: {
+            local_include_dirs: [
+                "encoder/riscv/svc",
+            ],
+
+            srcs: [
+                "encoder/riscv/svc/isvce_function_selector.c",
+            ],
+        },
+
+        x86: {
+            local_include_dirs: [
+                "encoder/x86/svc",
+                "common/x86/svc",
+            ],
+
+            srcs: [
+                "common/x86/svc/isvc_intra_resample_sse42.c",
+                "common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c",
+                "common/x86/svc/isvc_iquant_itrans_recon_sse42.c",
+                "common/x86/svc/isvc_iquant_itrans_recon_ssse3.c",
+                "common/x86/svc/isvc_mem_fns_sse42.c",
+                "common/x86/svc/isvc_mem_fns_ssse3.c",
+                "common/x86/svc/isvc_padding_ssse3.c",
+                "common/x86/svc/isvc_resi_trans_quant_sse42.c",
+                "encoder/x86/svc/isvce_downscaler_sse42.c",
+                "encoder/x86/svc/isvce_function_selector.c",
+                "encoder/x86/svc/isvce_function_selector_sse42.c",
+                "encoder/x86/svc/isvce_function_selector_ssse3.c",
+                "encoder/x86/svc/isvce_rc_utils_sse42.c",
+                "encoder/x86/svc/isvce_residual_pred_sse42.c",
+            ],
+        },
+
+        x86_64: {
+            local_include_dirs: [
+                "encoder/x86/svc",
+                "common/x86/svc",
+            ],
+
+            srcs: [
+                "common/x86/svc/isvc_intra_resample_sse42.c",
+                "common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c",
+                "common/x86/svc/isvc_iquant_itrans_recon_sse42.c",
+                "common/x86/svc/isvc_iquant_itrans_recon_ssse3.c",
+                "common/x86/svc/isvc_mem_fns_sse42.c",
+                "common/x86/svc/isvc_mem_fns_ssse3.c",
+                "common/x86/svc/isvc_padding_ssse3.c",
+                "common/x86/svc/isvc_resi_trans_quant_sse42.c",
+                "encoder/x86/svc/isvce_downscaler_sse42.c",
+                "encoder/x86/svc/isvce_function_selector.c",
+                "encoder/x86/svc/isvce_function_selector_sse42.c",
+                "encoder/x86/svc/isvce_function_selector_ssse3.c",
+                "encoder/x86/svc/isvce_rc_utils_sse42.c",
+                "encoder/x86/svc/isvce_residual_pred_sse42.c",
+            ],
+        },
+    },
 }

 subdirs = ["test"]
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -4,6 +4,8 @@ enable_language(ASM)

 set(AVC_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
 set(AVC_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+option(ENABLE_MVC "Enables svcenc and svcdec builds" OFF)
+option(ENABLE_SVC "Enables svcenc and svcdec builds" OFF)

 if("${AVC_ROOT}" STREQUAL "${AVC_CONFIG_DIR}")
  message(
@ -36,13 +38,29 @@ libavc_set_link_libraries()

 include("${AVC_ROOT}/common/common.cmake")
 include("${AVC_ROOT}/decoder/libavcdec.cmake")
-include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake")
+if (${ENABLE_MVC})
+  include("${AVC_ROOT}/decoder/mvc/libmvcdec.cmake")
+endif()
 include("${AVC_ROOT}/encoder/libavcenc.cmake")
+if (${ENABLE_SVC})
+  include("${AVC_ROOT}/common/svccommon.cmake")
+  include("${AVC_ROOT}/encoder/svc/libsvcenc.cmake")
+endif()

 include("${AVC_ROOT}/test/decoder/avcdec.cmake")
-include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake")
+if (${ENABLE_MVC})
+  include("${AVC_ROOT}/test/mvcdec/mvcdec.cmake")
+endif()
 include("${AVC_ROOT}/test/encoder/avcenc.cmake")
+if (${ENABLE_SVC})
+  include("${AVC_ROOT}/test/svcenc/svcenc.cmake")
+endif()

 include("${AVC_ROOT}/fuzzer/avc_dec_fuzzer.cmake")
-include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake")
+if (${ENABLE_MVC})
+  include("${AVC_ROOT}/fuzzer/mvc_dec_fuzzer.cmake")
+endif()
 include("${AVC_ROOT}/fuzzer/avc_enc_fuzzer.cmake")
+if (${ENABLE_SVC})
+  include("${AVC_ROOT}/fuzzer/svc_enc_fuzzer.cmake")
+endif()
--- a/common/arm/svc/isvc_intra_sampling_neon.c
+++ b/common/arm/svc/isvc_intra_sampling_neon.c
@ -0,0 +1,485 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ * *******************************************************************************
+ * * @file
+ *  isvc_intra_sampling_neon.c
+ *
+ * @brief
+ *  neon variants of intra sampling functions used by IBL mode
+ *
+ * *******************************************************************************
+ */
+
+#include <arm_neon.h>
+#include <string.h>
+
+#include "ih264_typedefs.h"
+#include "isvc_intra_resample.h"
+
+void isvc_interpolate_base_luma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
+                                            UWORD8 *pu1_out_buf, WORD32 i4_out_stride)
+{
+    WORD32 i4_y;
+    WORD16 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
+    WORD32 i4_filt_stride, i4_src_stride;
+    UWORD8 *pu1_inp = pu1_inp_buf;
+    UWORD8 *pu1_out = pu1_out_buf;
+    WORD16 *pi2_tmp = pi2_tmp_filt_buf;
+
+    int16x4_t i4_rslt_vert_16x4_1, i4_rslt_vert_16x4_2;
+    uint8x8_t i4_samp_vert_8x8_0, i4_samp_vert_8x8_1, i4_samp_vert_8x8_2, i4_samp_vert_8x8_3;
+    int16x8_t i4_rslt_vert_16x8_0, i4_rslt_vert_16x8_2;
+
+    /* Horizontal interpolation */
+    int32x4_t i4_rslt_horz_r0_1, i4_rslt_horz_r1_1, i4_rslt_horz_r0_2, i4_rslt_horz_r1_2;
+    uint16x4_t i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp, i4_rslt_horz_r0_2_tmp,
+        i4_rslt_horz_r1_2_tmp;
+    uint16x8_t rslt_16x8_t_1, rslt_16x8_t_2;
+
+    int16x4_t i4_samp_horz_16x4_0, i4_samp_horz_16x4_1, i4_samp_horz_16x4_2, i4_samp_horz_16x4_3,
+        i4_samp_horz_16x4_4;
+    int16x4_t i4_samp_horz_16x4_5, i4_samp_horz_16x4_6, i4_samp_horz_16x4_7, i4_samp_horz_16x4_8;
+    int16_t i4_coeff_c0 = -3;
+    int16_t i4_coeff_c1 = 28;
+    int16_t i4_coeff_c2 = 8;
+    int16_t i4_coeff_c3 = -1;
+    int32x4x2_t i4_rslt_horz_r0_tmp32, i4_rslt_horz_r1_tmp32;
+    int32x4_t const_512_32x4 = vdupq_n_s32(512);
+
+    /* Filter coefficient values for phase 4 */
+    i4_coeff_0 = -3;
+    i4_coeff_1 = 28;
+    i4_coeff_2 = 8;
+    i4_coeff_3 = -1;
+
+    i4_filt_stride = 12;
+    i4_src_stride = DYADIC_REF_W_Y;
+
+    /* Vertical interpolation */
+    {
+        /* First 64 bits*/
+        i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+        i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+        i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+        i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+
+        i4_rslt_vert_16x8_0 =
+            vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3);
+        i4_rslt_vert_16x8_0 = vmlaq_n_s16(
+            i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2);
+        i4_rslt_vert_16x8_0 = vmlaq_n_s16(
+            i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1);
+        i4_rslt_vert_16x8_0 = vmlaq_n_s16(
+            i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0);
+
+        vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_0);
+        pi2_tmp += i4_filt_stride;
+
+        for(i4_y = 1; i4_y < 15; i4_y += 2)
+        {
+            i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
+            i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
+            i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
+            i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
+
+            i4_rslt_vert_16x8_0 =
+                vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0);
+            i4_rslt_vert_16x8_0 =
+                vmlaq_n_s16(i4_rslt_vert_16x8_0,
+                            vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1);
+            i4_rslt_vert_16x8_0 =
+                vmlaq_n_s16(i4_rslt_vert_16x8_0,
+                            vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2);
+            i4_rslt_vert_16x8_0 =
+                vmlaq_n_s16(i4_rslt_vert_16x8_0,
+                            vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3);
+
+            i4_rslt_vert_16x8_2 =
+                vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_3);
+            i4_rslt_vert_16x8_2 =
+                vmlaq_n_s16(i4_rslt_vert_16x8_2,
+                            vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_2);
+            i4_rslt_vert_16x8_2 =
+                vmlaq_n_s16(i4_rslt_vert_16x8_2,
+                            vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_1);
+            i4_rslt_vert_16x8_2 =
+                vmlaq_n_s16(i4_rslt_vert_16x8_2,
+                            vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_0);
+
+            vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0));
+            pi2_tmp += i4_filt_stride;
+            vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_2));
+            pi2_tmp += i4_filt_stride;
+            pu1_inp += i4_src_stride;
+        }
+
+        /* y = 15, y_phase = 4 */
+        i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
+        i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
+        i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
+        i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
+
+        i4_rslt_vert_16x8_0 =
+            vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_0)), i4_coeff_0);
+        i4_rslt_vert_16x8_0 = vmlaq_n_s16(
+            i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_1)), i4_coeff_1);
+        i4_rslt_vert_16x8_0 = vmlaq_n_s16(
+            i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_2)), i4_coeff_2);
+        i4_rslt_vert_16x8_0 = vmlaq_n_s16(
+            i4_rslt_vert_16x8_0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_3)), i4_coeff_3);
+
+        vst1q_s16(pi2_tmp, (i4_rslt_vert_16x8_0));
+    }
+
+    {
+        /* Remaining 32 bits */
+        pu1_inp = pu1_inp_buf + 8;
+        pi2_tmp = pi2_tmp_filt_buf + 8;
+
+        i4_samp_vert_8x8_0 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+        i4_samp_vert_8x8_1 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+        i4_samp_vert_8x8_2 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+        i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
+        pu1_inp += i4_src_stride;
+
+        i4_rslt_vert_16x4_1 = vmul_n_s16(
+            vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3);
+        i4_rslt_vert_16x4_1 = vmla_n_s16(
+            i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))),
+            i4_coeff_2);
+        i4_rslt_vert_16x4_1 = vmla_n_s16(
+            i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))),
+            i4_coeff_1);
+        i4_rslt_vert_16x4_1 = vmla_n_s16(
+            i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))),
+            i4_coeff_0);
+
+        vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
+        pi2_tmp += i4_filt_stride;
+
+        for(i4_y = 1; i4_y < 15; i4_y += 2)
+        {
+            i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
+            i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
+            i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
+            i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
+
+            i4_rslt_vert_16x4_1 = vmul_n_s16(
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0);
+            i4_rslt_vert_16x4_1 = vmla_n_s16(
+                i4_rslt_vert_16x4_1,
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_1);
+            i4_rslt_vert_16x4_1 = vmla_n_s16(
+                i4_rslt_vert_16x4_1,
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_2);
+            i4_rslt_vert_16x4_1 = vmla_n_s16(
+                i4_rslt_vert_16x4_1,
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_3);
+
+            i4_rslt_vert_16x4_2 = vmul_n_s16(
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_3);
+            i4_rslt_vert_16x4_2 = vmla_n_s16(
+                i4_rslt_vert_16x4_2,
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))), i4_coeff_2);
+            i4_rslt_vert_16x4_2 = vmla_n_s16(
+                i4_rslt_vert_16x4_2,
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))), i4_coeff_1);
+            i4_rslt_vert_16x4_2 = vmla_n_s16(
+                i4_rslt_vert_16x4_2,
+                vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))), i4_coeff_0);
+
+            vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
+            pi2_tmp += i4_filt_stride;
+            vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_2));
+            pi2_tmp += i4_filt_stride;
+            pu1_inp += i4_src_stride;
+        }
+
+        i4_samp_vert_8x8_0 = i4_samp_vert_8x8_1;
+        i4_samp_vert_8x8_1 = i4_samp_vert_8x8_2;
+        i4_samp_vert_8x8_2 = i4_samp_vert_8x8_3;
+        i4_samp_vert_8x8_3 = vld1_u8((const UWORD8 *) pu1_inp);
+
+        i4_rslt_vert_16x4_1 = vmul_n_s16(
+            vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_0))), i4_coeff_0);
+        i4_rslt_vert_16x4_1 = vmla_n_s16(
+            i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_1))),
+            i4_coeff_1);
+        i4_rslt_vert_16x4_1 = vmla_n_s16(
+            i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_2))),
+            i4_coeff_2);
+        i4_rslt_vert_16x4_1 = vmla_n_s16(
+            i4_rslt_vert_16x4_1, vreinterpret_s16_u16(vget_low_u16(vmovl_u8(i4_samp_vert_8x8_3))),
+            i4_coeff_3);
+
+        vst1_s16(pi2_tmp, (i4_rslt_vert_16x4_1));
+        /* Reinitializing the ptrs */
+        pu1_inp = pu1_inp_buf;
+        pi2_tmp = pi2_tmp_filt_buf;
+    }
+
+    /* Horizontal interpolation */
+    for(i4_y = 0; i4_y < 16; i4_y++)
+    {
+        i4_samp_horz_16x4_0 = vld1_s16(pi2_tmp);
+        i4_samp_horz_16x4_1 = vld1_s16(pi2_tmp + 1);
+        i4_samp_horz_16x4_2 = vld1_s16(pi2_tmp + 2);
+        i4_samp_horz_16x4_3 = vld1_s16(pi2_tmp + 3);
+        i4_samp_horz_16x4_4 = vld1_s16(pi2_tmp + 4);
+        i4_samp_horz_16x4_5 = vld1_s16(pi2_tmp + 5);
+        i4_samp_horz_16x4_6 = vld1_s16(pi2_tmp + 6);
+        i4_samp_horz_16x4_7 = vld1_s16(pi2_tmp + 7);
+        i4_samp_horz_16x4_8 = vld1_s16(pi2_tmp + 8);
+
+        i4_rslt_horz_r0_1 =
+            vmull_n_s16(i4_samp_horz_16x4_0, i4_coeff_c3); /* a0c3 a1c3  a2c3  a3c3 */
+        i4_rslt_horz_r0_1 =
+            vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_1,
+                        i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1  a2c0+a3c1  a3c0+a4c1 */
+        i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_2, i4_coeff_c1);
+        i4_rslt_horz_r0_1 = vmlal_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x4_3, i4_coeff_c0);
+        /* i4_rslt_horz_r0_1 : contains res at even pos:0,2,4,6 */
+
+        i4_rslt_horz_r1_1 =
+            vmull_n_s16(i4_samp_horz_16x4_1, i4_coeff_c0); /* a0c0 a1c0  a2c0  a3c0 */
+        i4_rslt_horz_r1_1 =
+            vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_2,
+                        i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1  a2c0+a3c1  a3c0+a4c1 */
+        i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_3, i4_coeff_c2);
+        i4_rslt_horz_r1_1 = vmlal_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x4_4, i4_coeff_c3);
+        /* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */
+
+        i4_rslt_horz_r0_2 =
+            vmull_n_s16(i4_samp_horz_16x4_4, i4_coeff_c3); /* a0c3 a1c3  a2c3  a3c3 */
+        i4_rslt_horz_r0_2 =
+            vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_5,
+                        i4_coeff_c2); /* a0c0+a1c1 a1c0+a2c1  a2c0+a3c1  a3c0+a4c1 */
+        i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_6, i4_coeff_c1);
+        i4_rslt_horz_r0_2 = vmlal_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x4_7, i4_coeff_c0);
+        /* i4_rslt_horz_r0_1 : contains res at even pos:8,10,12,14 */
+
+        i4_rslt_horz_r1_2 =
+            vmull_n_s16(i4_samp_horz_16x4_5, i4_coeff_c0); /* a0c0 a1c0  a2c0  a3c0 */
+        i4_rslt_horz_r1_2 =
+            vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_6,
+                        i4_coeff_c1); /* a0c0+a1c1 a1c0+a2c1  a2c0+a3c1  a3c0+a4c1 */
+        i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_7, i4_coeff_c2);
+        i4_rslt_horz_r1_2 = vmlal_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x4_8, i4_coeff_c3);
+        /* i4_rslt_horz_r1_1 : contains res at odd pos:1,3,5,7 */
+
+        i4_rslt_horz_r0_tmp32 = vzipq_s32(i4_rslt_horz_r0_1, i4_rslt_horz_r1_1);
+        i4_rslt_horz_r1_tmp32 = vzipq_s32(i4_rslt_horz_r0_2, i4_rslt_horz_r1_2);
+
+        i4_rslt_horz_r0_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[0], const_512_32x4);
+        i4_rslt_horz_r1_1 = vaddq_s32(i4_rslt_horz_r0_tmp32.val[1], const_512_32x4);
+        i4_rslt_horz_r0_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[0], const_512_32x4);
+        i4_rslt_horz_r1_2 = vaddq_s32(i4_rslt_horz_r1_tmp32.val[1], const_512_32x4);
+
+        i4_rslt_horz_r0_1_tmp = vqshrun_n_s32(i4_rslt_horz_r0_1, 10);
+        i4_rslt_horz_r1_1_tmp = vqshrun_n_s32(i4_rslt_horz_r1_1, 10);
+
+        i4_rslt_horz_r0_2_tmp = vqshrun_n_s32(i4_rslt_horz_r0_2, 10);
+        i4_rslt_horz_r1_2_tmp = vqshrun_n_s32(i4_rslt_horz_r1_2, 10);
+
+        rslt_16x8_t_1 = vcombine_u16(i4_rslt_horz_r0_1_tmp, i4_rslt_horz_r1_1_tmp);
+        rslt_16x8_t_2 = vcombine_u16(i4_rslt_horz_r0_2_tmp, i4_rslt_horz_r1_2_tmp);
+
+        vst1_u8(pu1_out, vqmovn_u16(rslt_16x8_t_1));
+        vst1_u8(pu1_out + 8, vqmovn_u16(rslt_16x8_t_2));
+
+        pu1_out += i4_out_stride;
+        pi2_tmp += i4_filt_stride;
+    }
+}
+
+void isvc_horz_interpol_chroma_dyadic_neon(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
+                                           WORD32 i4_out_stride, WORD32 i4_phase_0,
+                                           WORD32 i4_phase_1)
+{
+    WORD32 i4_y;
+    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
+    UWORD8 *pu1_out = pu1_out_buf;
+    WORD16 *pi2_tmp = pi2_tmp_filt_buf;
+    WORD32 i4_filt_stride = 6;
+    WORD32 i4_dst_stride = i4_out_stride;
+
+    int16x8_t i4_samp_horz_16x8_r0_0, i4_samp_horz_16x8_r0_1, i4_samp_horz_16x8_r0_2;
+    int16x8_t i4_samp_horz_16x8_r1_0, i4_samp_horz_16x8_r1_1, i4_samp_horz_16x8_r1_2;
+    int16x8_t i4_rslt_horz_r0_1, i4_rslt_horz_r0_2;
+    int16x8_t i4_rslt_horz_r1_1, i4_rslt_horz_r1_2;
+
+    int16x8x2_t temp_horz_16x8_r0;
+    int16x8x2_t temp_horz_16x8_r1;
+    int16x8_t final_horz_16x8_r0_1;
+    int16x8_t final_horz_16x8_r1_1;
+
+    uint8x16_t i4_out_horz_8x16_r0, i4_out_horz_8x16_r1;
+    uint8x16_t chroma_mask_8x16 = vreinterpretq_u8_u16(vdupq_n_u16(0x00ff));
+
+    i4_coeff_0 = 16 - i4_phase_0;
+    i4_coeff_1 = i4_phase_0;
+    i4_coeff_2 = 16 - i4_phase_1;
+    i4_coeff_3 = i4_phase_1;
+
+    /* Horizontal interpolation */
+    for(i4_y = 0; i4_y < 8; i4_y += 2)
+    {
+        i4_samp_horz_16x8_r0_0 = vld1q_s16(pi2_tmp);     /* a0 a1 a2 a3 a4 a5 a6 a7 */
+        i4_samp_horz_16x8_r0_1 = vld1q_s16(pi2_tmp + 1); /* a1 a2 a3 a4 */
+        i4_samp_horz_16x8_r0_2 = vld1q_s16(pi2_tmp + 2); /* a2 a3 a4 a5 */
+
+        i4_samp_horz_16x8_r1_0 = vld1q_s16(pi2_tmp + i4_filt_stride);
+        i4_samp_horz_16x8_r1_1 = vld1q_s16(pi2_tmp + i4_filt_stride + 1);
+        i4_samp_horz_16x8_r1_2 = vld1q_s16(pi2_tmp + (i4_filt_stride + 2));
+
+        i4_rslt_horz_r0_1 =
+            vmulq_n_s16(i4_samp_horz_16x8_r0_0, i4_coeff_0); /* a0c0 a1c0  a2c0  a3c0 */
+        i4_rslt_horz_r0_2 =
+            vmulq_n_s16(i4_samp_horz_16x8_r0_1, i4_coeff_2); /* a1c2 a2c2  a3c2 a4c2 */
+
+        i4_rslt_horz_r0_1 = vmlaq_n_s16(i4_rslt_horz_r0_1, i4_samp_horz_16x8_r0_1,
+                                        i4_coeff_1); /* a0c0+a1c1 a1c0+a2c1  a2c0+a3c1  a3c0+a4c1 */
+        i4_rslt_horz_r0_2 = vmlaq_n_s16(i4_rslt_horz_r0_2, i4_samp_horz_16x8_r0_2,
+                                        i4_coeff_3); /* a1c2+a2c3  a2c2+a3c3 a3c2+a4c3 a4c2+a5c3 */
+
+        i4_rslt_horz_r1_1 = vmulq_n_s16(i4_samp_horz_16x8_r1_0, i4_coeff_0);
+        i4_rslt_horz_r1_2 = vmulq_n_s16(i4_samp_horz_16x8_r1_1, i4_coeff_2);
+
+        i4_rslt_horz_r1_1 = vmlaq_n_s16(i4_rslt_horz_r1_1, i4_samp_horz_16x8_r1_1, i4_coeff_1);
+        i4_rslt_horz_r1_2 = vmlaq_n_s16(i4_rslt_horz_r1_2, i4_samp_horz_16x8_r1_2, i4_coeff_3);
+
+        temp_horz_16x8_r0 = vzipq_s16(i4_rslt_horz_r0_1, i4_rslt_horz_r0_2);
+        temp_horz_16x8_r1 = vzipq_s16(i4_rslt_horz_r1_1, i4_rslt_horz_r1_2);
+
+        final_horz_16x8_r0_1 = temp_horz_16x8_r0.val[0];
+        final_horz_16x8_r1_1 = temp_horz_16x8_r1.val[0];
+
+        final_horz_16x8_r0_1 = vrshrq_n_s16(final_horz_16x8_r0_1, 8);
+        final_horz_16x8_r1_1 = vrshrq_n_s16(final_horz_16x8_r1_1, 8);
+
+        i4_out_horz_8x16_r0 = vld1q_u8(pu1_out);
+        i4_out_horz_8x16_r1 = vld1q_u8(pu1_out + i4_dst_stride);
+
+        i4_out_horz_8x16_r0 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r0_1),
+                                       i4_out_horz_8x16_r0);
+        i4_out_horz_8x16_r1 = vbslq_u8(chroma_mask_8x16, vreinterpretq_u8_s16(final_horz_16x8_r1_1),
+                                       i4_out_horz_8x16_r1);
+
+        vst1q_u8(pu1_out, i4_out_horz_8x16_r0);
+        vst1q_u8(pu1_out + i4_dst_stride, i4_out_horz_8x16_r1);
+
+        /* Incrementing ptr */
+        pi2_tmp += (i4_filt_stride << 1);
+        pu1_out += (i4_dst_stride << 1);
+    }
+}
+
+void isvc_vert_interpol_chroma_dyadic_neon(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
+                                           WORD32 i4_phase_0, WORD32 i4_phase_1)
+{
+    WORD32 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
+    WORD32 i4_src_stride = DYADIC_REF_W_C;
+    UWORD8 *pu1_inp = pu1_inp_buf;
+    WORD16 *pi2_tmp = pi2_tmp_filt_buf;
+
+    uint8x8_t i4_samp_vert_8x8_r0, i4_samp_vert_8x8_r1, i4_samp_vert_8x8_r2, i4_samp_vert_8x8_r3,
+        i4_samp_vert_8x8_r4, i4_samp_vert_8x8_r5;
+
+    int16x8_t i4_rslt_vert_16x8_r0, i4_rslt_vert_16x8_r1, i4_rslt_vert_16x8_r2,
+        i4_rslt_vert_16x8_r3, i4_rslt_vert_16x8_r4, i4_rslt_vert_16x8_r5, i4_rslt_vert_16x8_r6,
+        i4_rslt_vert_16x8_r7;
+
+    i4_coeff_0 = 16 - i4_phase_0;
+    i4_coeff_1 = i4_phase_0;
+    i4_coeff_2 = 16 - i4_phase_1;
+    i4_coeff_3 = i4_phase_1;
+
+    /* Vertical interpolation */
+    i4_samp_vert_8x8_r0 = vld1_u8(pu1_inp);
+    pu1_inp += i4_src_stride;
+    i4_samp_vert_8x8_r1 = vld1_u8(pu1_inp);
+    pu1_inp += i4_src_stride;
+    i4_samp_vert_8x8_r2 = vld1_u8(pu1_inp);
+    pu1_inp += i4_src_stride;
+    i4_samp_vert_8x8_r3 = vld1_u8(pu1_inp);
+    pu1_inp += i4_src_stride;
+    i4_samp_vert_8x8_r4 = vld1_u8(pu1_inp);
+    pu1_inp += i4_src_stride;
+    i4_samp_vert_8x8_r5 = vld1_u8(pu1_inp);
+    pu1_inp += i4_src_stride;
+
+    i4_rslt_vert_16x8_r0 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r0)), i4_coeff_0);
+    i4_rslt_vert_16x8_r0 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r0, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_1);
+    vst1q_s16(pi2_tmp, i4_rslt_vert_16x8_r0);
+
+    i4_rslt_vert_16x8_r1 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_2);
+    i4_rslt_vert_16x8_r1 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r1, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_3);
+    vst1q_s16(pi2_tmp + 6, i4_rslt_vert_16x8_r1);
+
+    i4_rslt_vert_16x8_r2 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r1)), i4_coeff_0);
+    i4_rslt_vert_16x8_r2 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r2, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_1);
+    vst1q_s16(pi2_tmp + 12, i4_rslt_vert_16x8_r2);
+
+    i4_rslt_vert_16x8_r3 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_2);
+    i4_rslt_vert_16x8_r3 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r3, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_3);
+    vst1q_s16(pi2_tmp + 18, i4_rslt_vert_16x8_r3);
+
+    i4_rslt_vert_16x8_r4 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r2)), i4_coeff_0);
+    i4_rslt_vert_16x8_r4 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r4, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_1);
+    vst1q_s16(pi2_tmp + 24, i4_rslt_vert_16x8_r4);
+
+    i4_rslt_vert_16x8_r5 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_2);
+    i4_rslt_vert_16x8_r5 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r5, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_3);
+    vst1q_s16(pi2_tmp + 30, i4_rslt_vert_16x8_r5);
+
+    i4_rslt_vert_16x8_r6 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r3)), i4_coeff_0);
+    i4_rslt_vert_16x8_r6 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r6, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_1);
+    vst1q_s16(pi2_tmp + 36, i4_rslt_vert_16x8_r6);
+
+    i4_rslt_vert_16x8_r7 =
+        vmulq_n_s16(vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r4)), i4_coeff_2);
+    i4_rslt_vert_16x8_r7 = vmlaq_n_s16(
+        i4_rslt_vert_16x8_r7, vreinterpretq_s16_u16(vmovl_u8(i4_samp_vert_8x8_r5)), i4_coeff_3);
+    vst1_s16(pi2_tmp + 42, vget_low_s16(i4_rslt_vert_16x8_r7));
+    vst1q_lane_s16(pi2_tmp + 46, i4_rslt_vert_16x8_r7, 4);
+    vst1q_lane_s16(pi2_tmp + 47, i4_rslt_vert_16x8_r7, 5);
+}
--- a/common/arm/svc/isvc_iquant_itrans_recon_neon.c
+++ b/common/arm/svc/isvc_iquant_itrans_recon_neon.c
--- a/common/arm/svc/isvc_mem_fns_neon.c
+++ b/common/arm/svc/isvc_mem_fns_neon.c
@ -0,0 +1,151 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ * *******************************************************************************
+ * * @file
+ *  isvc_mem_fns_av8.c
+ *
+ * @brief
+ *  armv8 variants of
+ * functions used for memory operations
+ *
+ * *******************************************************************************
+ */
+#include <arm_neon.h>
+#include <string.h>
+
+#include "ih264_typedefs.h"
+#include "isvc_mem_fns.h"
+
+void isvc_memset_2d_neon(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
+                         WORD32 i4_blk_ht)
+{
+    if(i4_blk_wd == 4)
+    {
+        vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
+        pu1_dst += i4_dst_stride;
+
+        vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
+        pu1_dst += i4_dst_stride;
+
+        vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
+        pu1_dst += i4_dst_stride;
+
+        vst1_lane_u32((UWORD32 *) pu1_dst, vreinterpret_u32_u8(vdup_n_u8(u1_val)), 0);
+    }
+    else if(i4_blk_wd == 8)
+    {
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        vst1_u8(pu1_dst, vdup_n_u8(u1_val));
+    }
+    else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0))
+    {
+        WORD32 i, j;
+        UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr;
+        WORD32 i4_width_by_16 = i4_blk_wd / 16;
+        WORD32 i4_height_by_16 = i4_blk_ht / 16;
+
+        for(i = 0; i < i4_height_by_16; i++)
+        {
+            pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride;
+            for(j = 0; j < i4_width_by_16; j++)
+            {
+                pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4);
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                vst1q_u8(&pu1_dst_col_ptr[0], vdupq_n_u8(u1_val));
+            }
+        }
+    }
+    else
+    {
+        WORD32 i;
+
+        for(i = 0; i < i4_blk_ht; i++)
+        {
+            memset(pu1_dst, u1_val, i4_blk_wd);
+            pu1_dst += i4_dst_stride;
+        }
+    }
+}
--- a/common/arm/svc/isvc_resi_trans_quant_neon.c
+++ b/common/arm/svc/isvc_resi_trans_quant_neon.c
--- a/common/ih264_cabac_tables.h
+++ b/common/ih264_cabac_tables.h
@ -141,11 +141,16 @@ typedef enum
    LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 417,
    COEFF_ABS_LEVEL_MINUS1_8X8 = 426,
    SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 436,
-    LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451
+    LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451,
+
+    /* SVC related CABAC offsets */
+    BASE_MODE_FLAG = 460,
+    MOTION_PREDICTION_FLAG_L0 = 463,
+    MOTION_PREDICTION_FLAG_L1 = 464,
+    RESIDUAL_PREDICTION_FLAG = 465,

 } cabac_table_num_t;

-
 /**
 ******************************************************************************
 *  @enum  ctxIdxOffset
--- a/common/ih264_defs.h
+++ b/common/ih264_defs.h
@ -135,6 +135,9 @@ enum
    ISLICE = 2,
    SPSLICE = 3,
    SISLICE = 4,
+    EPSLICE = 5,
+    EBSLICE = 6,
+    EISLICE = 7,
    MAXSLICE_TYPE,
 };

@ -144,27 +147,28 @@ enum
 *  @brief Defines the set of possible nal unit types
 ******************************************************************************
 */
-enum
+typedef enum NAL_UNIT_TYPE_T
 {
-    NAL_UNSPEC_0        = 0,
-    NAL_SLICE_NON_IDR   = 1,
-    NAL_SLICE_DPA       = 2,
-    NAL_SLICE_DPB       = 3,
-    NAL_SLICE_DPC       = 4,
-    NAL_SLICE_IDR       = 5,
-    NAL_SEI             = 6,
-    NAL_SPS             = 7,
-    NAL_PPS             = 8,
-    NAL_AUD             = 9,
-    NAL_EOSEQ           = 10,
-    NAL_EOSTR           = 11,
-    NAL_FILLER          = 12,
-    NAL_SPSE            = 13,
-    NAL_RES_18          = 14,
-    NAL_AUX_PIC         = 19,
-    NAL_RES_23          = 20,
-    NAL_UNSPEC_31       = 24,
-};
+    NAL_UNSPEC_0 = 0,
+    NAL_SLICE_NON_IDR = 1,
+    NAL_SLICE_DPA = 2,
+    NAL_SLICE_DPB = 3,
+    NAL_SLICE_DPC = 4,
+    NAL_SLICE_IDR = 5,
+    NAL_SEI = 6,
+    NAL_SPS = 7,
+    NAL_PPS = 8,
+    NAL_AUD = 9,
+    NAL_EOSEQ = 10,
+    NAL_EOSTR = 11,
+    NAL_FILLER = 12,
+    NAL_SPSE = 13,
+    NAL_PREFIX = 14,
+    NAL_SUBSET_SPS = 15,
+    NAL_AUX_PIC = 19,
+    NAL_CODED_SLICE_EXTENSION = 20,
+    NAL_UNSPEC_31 = 24,
+} NAL_UNIT_TYPE_T;

 /**
 ******************************************************************************
@ -261,27 +265,29 @@ typedef enum
 */
 typedef enum
 {
-    I16x16      = 0,
-    I4x4        = 1,
-    I8x8        = 2,
-    P16x16      = 3,
-    P16x8       = 4,
-    P8x16       = 5,
-    P8x8        = 6,
-    PSKIP       = 7,
-    IPCM        = 8,
-    B16x16      = 9,
-    BSKIP       = 10,
-    BDIRECT     = 11,
+    INVALID_MB_TYPE = -1,
+    I16x16 = 0,
+    I4x4 = 1,
+    I8x8 = 2,
+    P16x16 = 3,
+    P16x8 = 4,
+    P8x16 = 5,
+    P8x8 = 6,
+    PSKIP = 7,
+    IPCM = 8,
+    B16x16 = 9,
+    BSKIP = 10,
+    BDIRECT = 11,
+    BASE_MODE = 12,
    MAX_MBTYPES,
-}MBTYPES_T;
+} MBTYPES_T;

 /* Pred Modes */
 enum
 {
    BLOCK_TYPE_INTER_MB = 0,
    BLOCK_TYPE_INTRA_MB = 1,
-    BLOCK_TYPE_SKIP_MB  = 2
+    BLOCK_TYPE_SKIP_MB = 2
 };

 /* Prediction list */
@ -521,9 +527,16 @@ typedef enum
 /* Number of max TU in a MB row */
 #define MAX_TU_IN_MB_ROW   ((MB_SIZE / MIN_TU_SIZE))

+#define MIN_TU_IN_MB_ROW ((MB_SIZE / MAX_TU_SIZE))
+
 /* Number of max PU in a CTb row */
 #define MAX_PU_IN_MB_ROW   ((MB_SIZE / MIN_PU_SIZE))

+#define MAX_TU_IN_MB_COL MAX_TU_IN_MB_ROW
+
+#define MIN_TU_IN_MB_COL MIN_TU_IN_MB_ROW
+
+#define MAX_PU_IN_MB_COL MAX_PU_IN_MB_ROW

 /* Number of max PU in a MB */
 /*****************************************************************************/
@ -537,7 +550,11 @@ typedef enum
 #define MAX_TU_IN_MB       ((MB_SIZE / MIN_TU_SIZE) * \
                             (MB_SIZE / MIN_TU_SIZE))

+#define MIN_TU_IN_MB (MIN_TU_IN_MB_ROW * MIN_TU_IN_MB_COL)

+#define NUM_4x4_IN_8x8 4
+
+#define NUM_COEFFS_IN_MIN_TU (MIN_TU_SIZE * MIN_TU_SIZE)

 /**
 * Maximum transform depths
--- a/common/ih264_size_defs.h
+++ b/common/ih264_size_defs.h
@ -44,6 +44,8 @@
 /*Width of a 4x4 block*/
 #define SUB_BLK_WIDTH_4x4                   4

+#define SUB_BLK_HEIGHT_4x4                  4
+
 /*Width of an 8x8 block*/
 #define SUB_BLK_WIDTH_8x8                   8

--- a/common/svc/isvc_cabac_tables.c
+++ b/common/svc/isvc_cabac_tables.c
--- a/common/svc/isvc_cabac_tables.h
+++ b/common/svc/isvc_cabac_tables.h
@ -0,0 +1,57 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file isvc_cabac_tables.h
+*
+* @brief
+*  This file contains enumerations, macros and extern declarations of H264
+*  cabac tables
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef _ISVC_CABAC_TABLES_H_
+#define _ISVC_CABAC_TABLES_H_
+
+#include "ih264_cabac_tables.h"
+/**
+******************************************************************************
+*  @brief  max range of cabac contexts in H264 (0-459)
+******************************************************************************
+*/
+#define NUM_SVC_CABAC_CTXTS 467
+
+extern const UWORD32 (*gau4_isvc_cabac_table)[4];
+
+/*****************************************************************************/
+/* Cabac tables for context initialization depending upon type of Slice,     */
+/* cabac init Idc value and Qp.                                              */
+/*****************************************************************************/
+extern const UWORD8 gau1_isvc_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE]
+                                                   [NUM_SVC_CABAC_CTXTS];
+
+#endif
--- a/common/svc/isvc_common_tables.c
+++ b/common/svc/isvc_common_tables.c
@ -0,0 +1,81 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvc_common_tables.c
+*
+* @brief
+*  Contains common global tables
+*
+* @author
+*  Harish M
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "isvc_defs.h"
+#include "isvc_macros.h"
+#include "isvc_structs.h"
+#include "ih264_common_tables.h"
+#include "isvc_common_tables.h"
+
+/*****************************************************************************/
+/* Extern global definitions                                                 */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief  while encoding, basing on the input configuration parameters, the
+ * the level of the bitstream is computed basing on the table below.
+ * input  : table_idx
+ * output : level_idc or cpb size
+ * @remarks Table A-1 – level table limits
+ ******************************************************************************
+ */
+const level_tables_t gas_isvc_lvl_tbl[16] = {
+    {IH264_LEVEL_10, 1485, 99, 396, 64, 175, 64},
+    {IH264_LEVEL_1B, 1485, 99, 396, 128, 350, 64},
+    {IH264_LEVEL_11, 3000, 396, 900, 192, 500, 128},
+    {IH264_LEVEL_12, 6000, 396, 2376, 384, 1000, 128},
+    {IH264_LEVEL_13, 11880, 396, 2376, 768, 2000, 128},
+    {IH264_LEVEL_20, 11880, 396, 2376, 2000, 2000, 128},
+    {IH264_LEVEL_21, 19800, 792, 4752, 4000, 4000, 256},
+    {IH264_LEVEL_22, 20250, 1620, 8100, 4000, 4000, 256},
+    {IH264_LEVEL_30, 40500, 1620, 8100, 10000, 10000, 256},
+    {IH264_LEVEL_31, 108000, 3600, 18000, 14000, 14000, 512},
+    {IH264_LEVEL_32, 216000, 5120, 20480, 20000, 20000, 512},
+    {IH264_LEVEL_40, 245760, 8192, 32768, 20000, 25000, 512},
+    {IH264_LEVEL_41, 245760, 8192, 32768, 50000, 62500, 512},
+    {IH264_LEVEL_42, 522240, 8704, 34816, 50000, 62500, 512},
+    {IH264_LEVEL_50, 589824, 22080, 110400, 135000, 135000, 512},
+    {IH264_LEVEL_51, 983040, 36864, 184320, 240000, 240000, 512},
+};
--- a/common/svc/isvc_common_tables.h
+++ b/common/svc/isvc_common_tables.h
@ -0,0 +1,50 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvc_common_tables.h
+*
+* @brief
+*  Common tables
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVC_COMMON_TABLES_H_
+#define _ISVC_COMMON_TABLES_H_
+
+/* Dependencies of ih264_common_tables.h */
+#include "ih264_defs.h"
+#include "ih264_structs.h"
+
+#include "ih264_common_tables.h"
+
+extern const level_tables_t gas_isvc_lvl_tbl[16];
+
+#endif
--- a/common/svc/isvc_defs.h
+++ b/common/svc/isvc_defs.h
@ -0,0 +1,88 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvc_defs.h
+*
+* @brief
+*  Contains macro defintions, and other typedefs used for SVC encoding
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVC_DEFS_H_
+#define _ISVC_DEFS_H_
+
+#define MAX_NUM_TEMPORAL_LAYERS 3
+
+#define MAX_NUM_SPATIAL_LAYERS 3
+
+#define MAX_VUI_EXT_NUM_ENTRIES (MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS)
+
+#define SVC_INTER_MB (1 << 0) /*!< Intra MBs other than IPCM and I_BL */
+
+#define SVC_INTRA_MB (1 << 1) /*!< P or B MBs decoded or inferred*/
+
+#define SVC_IPCM_MB (1 << 2) /*!< IPCM_MB  decoder or inferred*/
+
+#define SVC_IBL_MB (1 << 3) /*!< I_BL MB always inferred */
+
+#define SVC_INTRA_INTER_MB                                         \
+    (1 << 4) /*!< Intra Inter MB will have an alternate prediction \
+                process*/
+
+#define MB_WIDTH_SHIFT 4
+
+#define MB_HEIGHT_SHIFT 4
+
+#define UV 1
+
+#define NUM_SP_COMPONENTS 2
+
+#define NUM_COMPONENTS 3
+
+#define SVC_EXTRACT_MB_MODE(x) ((x) &0x1F)
+
+#define GET_BIT_TX_SIZE(x, y) ((x) & (1 << (7 - (y))))
+
+typedef enum SVC_PROFILES_T
+{
+    IH264_SCALABLE_BASELINE = 83,
+    IH264_SCALABLE_HIGH_PROFILE = 86
+} SVC_PROFILES_T;
+
+typedef enum PRED_MODE_T
+{
+    L0 = 0,
+    L1 = 1,
+    BI = 2,
+    NUM_PRED_DIRS = 2,
+    INVALID_PRED_MODE = 4,
+} PRED_MODE_T;
+
+#endif
--- a/common/svc/isvc_inter_pred_filters.h
+++ b/common/svc/isvc_inter_pred_filters.h
@ -0,0 +1,219 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+ *******************************************************************************
+ * @file
+ *  isvc_inter_pred_filters.h
+ *
+ * @brief
+ *  Declarations of functions used for inter prediction
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *  -ih264_inter_pred_luma_copy
+ *  -ih264_interleave_copy
+ *  -ih264_inter_pred_luma_horz
+ *  -ih264_inter_pred_luma_vert
+ *  -ih264_inter_pred_luma_horz_hpel_vert_hpel
+ *  -ih264_inter_pred_luma_vert_qpel
+ *  -ih264_inter_pred_luma_horz_qpel
+ *  -ih264_inter_pred_luma_horz_qpel_vert_qpel
+ *  -ih264_inter_pred_luma_horz_qpel_vert_hpel
+ *  -ih264_inter_pred_luma_horz_hpel_vert_qpel
+ *  -ih264_inter_pred_luma_bilinear
+ *  -ih264_inter_pred_chroma
+ *  -ih264_inter_pred_luma_copy_a9q
+ *  -ih264_interleave_copy_a9
+ *  -ih264_inter_pred_luma_horz_a9q
+ *  -ih264_inter_pred_luma_vert_a9q
+ *  -ih264_inter_pred_luma_bilinear_a9q
+ *  -ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q
+ *  -ih264_inter_pred_luma_horz_qpel_a9q
+ *  -ih264_inter_pred_luma_vert_qpel_a9q
+ *  -ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q
+ *  -ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q
+ *  -ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q
+ *  -ih264_inter_pred_chroma_a9q
+ *  -ih264_inter_pred_luma_copy_av8
+ *  -ih264_interleave_copy_av8
+ *  -ih264_inter_pred_luma_horz_av8
+ *  -ih264_inter_pred_luma_vert_av8
+ *  -ih264_inter_pred_luma_bilinear_av8
+ *  -ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
+ *  -ih264_inter_pred_luma_horz_qpel_av8
+ *  -ih264_inter_pred_luma_vert_qpel_av8
+ *  -ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
+ *  -ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
+ *  -ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
+ *  -ih264_inter_pred_chroma_av8
+ *  -ih264_inter_pred_chroma_dx_zero_av8
+ *  -ih264_inter_pred_chroma_dy_zero_av8
+ *  -ih264_inter_pred_luma_copy_ssse3
+ *  -ih264_inter_pred_luma_copy_ssse3
+ *  -ih264_inter_pred_luma_horz_ssse3
+ *  -ih264_inter_pred_luma_vert_ssse3
+ *  -ih264_inter_pred_luma_bilinear_ssse3
+ *  -ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3
+ *  -ih264_inter_pred_luma_horz_qpel_ssse3
+ *  -ih264_inter_pred_luma_vert_qpel_ssse3
+ *  -ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3
+ *  -ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3
+ *  -ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3
+ *  -ih264_inter_pred_chroma_ssse3
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+#ifndef _ISVC_INTER_PRED_FILTERS_H_
+#define _ISVC_INTER_PRED_FILTERS_H_
+
+/*****************************************************************************/
+/* Constant Data variables                                                   */
+/*****************************************************************************/
+
+extern const WORD32 ih264_g_six_tap[3]; /* coefficients for 6 tap filtering*/
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+typedef void FT_INTER_PRED_LUMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
+                                WORD32 ht, WORD32 wd, UWORD8 *pu1_tmp, WORD32 dydx);
+
+typedef void FT_INTERLEAVE_COPY(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
+                                WORD32 ht, WORD32 wd);
+
+typedef void FT_INTER_PRED_LUMA_BILINEAR(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst,
+                                         WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd,
+                                         WORD32 height, WORD32 width);
+
+typedef void FT_INTER_PRED_CHROMA(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd,
+                                  WORD32 dst_strd, WORD32 dx, WORD32 dy, WORD32 ht, WORD32 wd);
+
+/* No NEON Declarations */
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy;
+
+FT_INTERLEAVE_COPY ih264_interleave_copy;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel;
+
+FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear;
+
+FT_INTER_PRED_CHROMA ih264_inter_pred_chroma;
+
+/* A9 NEON Declarations */
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_a9q;
+
+FT_INTERLEAVE_COPY ih264_interleave_copy_a9;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_a9q;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_a9q;
+
+FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_a9q;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_a9q;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_a9q;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q;
+
+FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_a9q;
+
+/* AV8 NEON Declarations */
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_av8;
+
+FT_INTERLEAVE_COPY ih264_interleave_copy_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_av8;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_av8;
+
+FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_av8;
+
+FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dx_zero_av8;
+
+FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_dy_zero_av8;
+
+/* SSSE3 Intrinsic Declarations */
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_copy_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_ssse3;
+
+FT_INTER_PRED_LUMA_BILINEAR ih264_inter_pred_luma_bilinear_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_vert_qpel_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
+
+FT_INTER_PRED_LUMA ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
+
+FT_INTER_PRED_CHROMA ih264_inter_pred_chroma_ssse3;
+
+/** Nothing past this point */
+
+#endif
--- a/common/svc/isvc_intra_resample.c
+++ b/common/svc/isvc_intra_resample.c
--- a/common/svc/isvc_intra_resample.h
+++ b/common/svc/isvc_intra_resample.h
@ -0,0 +1,251 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+#ifndef _ISVC_INTRA_RESAMPLE_H_
+#define _ISVC_INTRA_RESAMPLE_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_macros.h"
+#include "ih264_platform_macros.h"
+#include "isvc_structs.h"
+
+#define DYADIC_REF_W_Y 20
+#define DYADIC_REF_H_Y 20
+#define DYADIC_REF_W_C 10
+#define DYADIC_REF_H_C 10
+
+#define MAX_NUM_RES_LYRS 4
+
+#define MAX_PIX_FILL_LUMA 4
+#define MAX_PIX_FILL_CHROMA 2
+
+#define MAX_REF_ARR_WD_HT 48
+#define MAX_REF_IDX_ARRAY (MAX_REF_ARR_WD_HT + MB_SIZE)
+
+#define CLIPUCHAR(x) CLIP3(0, 255, (x))
+
+#define REF_ARRAY_WIDTH 48
+#define REF_ARRAY_HEIGHT 48
+
+typedef void FT_INTERPOLATE_LUMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
+                                    UWORD8 *pu1_out_buf, WORD32 i4_out_stride);
+
+typedef void FT_VERT_INTERPOLATE_CHROMA_2X(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
+                                           WORD32 i4_phase_0, WORD32 i4_phase_1);
+
+typedef void FT_HORZ_INTERPOLATE_CHROMA_2X(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
+                                           WORD32 i4_out_stride, WORD32 i4_phase_0,
+                                           WORD32 i4_phase_1);
+
+typedef struct mem_element_t
+{
+    /* Buffer pointer */
+    void *pv_buffer;
+
+    /* size of the structure or unit */
+    WORD32 i4_element_size;
+
+    /* Stride of buffer in terms of number of elements.*/
+    WORD32 i4_num_element_stride;
+} mem_element_t;
+
+typedef struct seg_description_t
+{
+    /* describes segment dimension */
+    UWORD8 u1_seg_dim;
+
+    /* describes offset from start */
+    UWORD8 u1_seg_off;
+
+    /* describes whether mb is adjoining the segment
+       0 => not adjoining 1 => adjoining */
+    UWORD8 u1_mb_adjoin;
+
+    /* distance to nearest MB */
+    WORD8 i1_dist_idx;
+
+    /* describes the nearest mb boundary
+       +1 => rightMB/bottomMB
+       -1 => leftMB/topMB	*/
+    WORD8 i1_nearst_mb_bdry;
+} seg_description_t;
+
+typedef struct seg_lookup_desc_t
+{
+    /* place holder to store the number of segments */
+    UWORD8 u1_num_segments;
+
+    /* this variable indicates where is start locatiion of the segment with
+       respect to less the block_width or greater than block width*/
+    UWORD8 u4_start_pos;
+
+    /* place holder to store per segment description */
+    seg_description_t s_segments[4];
+} seg_lookup_desc_t;
+
+typedef struct intra_samp_lyr_ctxt
+{
+    /* mb position */
+    coordinates_t *ps_mb_pos;
+
+    /* reference layer width in terms luma samples */
+    WORD32 i4_ref_width;
+
+    /* reference layer height in terms luma samples */
+    WORD32 i4_ref_height;
+
+    /* Constrained intra resampling flag. Range is [0,1]. */
+    WORD8 i1_constrained_intra_rsmpl_flag;
+
+    /* Chroma xPhase for even values of x for dyadic cases */
+    WORD32 i4_x_phase_0;
+
+    /* Chroma xPhase for odd values of x for dyadic cases */
+    WORD32 i4_x_phase_1;
+
+    /* Chroma yPhase for even values of y for dyadic cases */
+    WORD32 i4_y_phase_0;
+
+    /* Chroma yPhase for odd values of y for dyadic cases */
+    WORD32 i4_y_phase_1;
+
+    FT_INTERPOLATE_LUMA_2X *pf_interpolate_luma;
+
+    FT_VERT_INTERPOLATE_CHROMA_2X *pf_vert_interpol_chroma;
+
+    FT_HORZ_INTERPOLATE_CHROMA_2X *pf_horz_interpol_chroma;
+
+    WORD16 i2_x_min_pos;
+
+    WORD16 i2_x_max_pos;
+
+    WORD16 i2_y_min_pos;
+
+    WORD16 i2_y_max_pos;
+
+    coordinates_t *ps_phase;
+
+    WORD32 *pi4_ref_array_positions_x;
+
+    WORD32 *pi4_ref_array_positions_y;
+
+    coordinates_t *ps_offsets;
+
+    coordinates_t *ps_ref_array_dims;
+
+    /* buffers to store lookup for horizontal segment description  */
+    seg_lookup_desc_t as_seg_lookup_horz[MB_SIZE];
+
+    /* buffers to store lookup for vertical segment description  */
+    seg_lookup_desc_t as_seg_lookup_vert[MB_SIZE];
+
+    /* buffers to store lookup for x indexes to get
+       availability from 4x4 availability grid */
+    UWORD8 au1_refarray_x_idx[MAX_REF_IDX_ARRAY];
+
+    /* buffers to store lookup for y indexes to get
+       availability from 4x4 availability grid */
+    UWORD8 au1_refarray_y_idx[MAX_REF_IDX_ARRAY];
+} intra_samp_lyr_ctxt;
+
+typedef struct intra_sampling_ctxt_t
+{
+    /* Array of resolution layer ctxt. */
+    intra_samp_lyr_ctxt as_res_lyrs[MAX_NUM_RES_LYRS];
+
+    /* pointer to array of SPS */
+    void *ps_sps;
+
+    /* buffer to store the reference layer data before intra sampling */
+    UWORD8 *pu1_refarray_buffer;
+
+    /* buffer to hold the reference layer Cb data before intra
+       resampling (used for dyadic cases only) */
+    UWORD8 *pu1_refarray_cb;
+
+    /* buffer to hold the reference layer Cr data before intra
+       resampling (used for dyadic cases only) */
+    UWORD8 *pu1_refarray_cr;
+
+    /* intermideate buffer for interpolation */
+    WORD32 *pi4_temp_interpolation_buffer;
+
+    /* resolution id of the layer which is to be processed */
+    WORD32 i4_res_lyr_id;
+
+    /* reference layer width in terms luma samples */
+    WORD32 i4_ref_width;
+
+    /* reference layer width in terms luma samples */
+    WORD32 i4_refarray_stride;
+
+    /* reference layer height in terms luma samples */
+    WORD32 i4_ref_height;
+} intra_sampling_ctxt_t;
+
+typedef struct inter_lyr_mb_prms_t
+{
+    /* NNZs of Chroma. Here each bit corresonds
+       to a NNZs of 4x4 sub block. Lower 4 bits are
+       used for Cb and upper are used for Cr */
+    UWORD8 u1_chroma_nnz;
+
+    /* NNZs of Luma. Here each bit corresonds
+       to a NNZs of 4x4 sub block in raster scan order. */
+    UWORD16 u2_luma_nnz;
+
+    /* Packed MB mode transform size of an MB */
+    WORD8 i1_mb_mode;
+} inter_lyr_mb_prms_t;
+
+/* Function declarations */
+extern void isvc_intra_samp_mb_dyadic(void *pv_intra_samp_ctxt, mem_element_t *ps_ref_luma,
+                                      mem_element_t *ps_ref_chroma,
+                                      mem_element_t *ps_ref_mb_mode_map,
+                                      mem_element_t *ps_curr_luma, mem_element_t *ps_curr_chroma,
+                                      UWORD16 u2_mb_x, UWORD16 u2_mb_y,
+                                      WORD32 i4_scaled_ref_layer_left_offset,
+                                      WORD32 i4_scaled_ref_layer_top_offset);
+
+extern void isvc_intra_samp_mb(void *pv_intra_samp_ctxt_luma, void *pv_intra_samp_ctxt_chroma,
+                               mem_element_t *ps_ref_luma, mem_element_t *ps_ref_chroma,
+                               mem_element_t *ps_ref_mb_mode_map, mem_element_t *ps_curr_luma,
+                               mem_element_t *ps_curr_chroma);
+
+extern void isvc_intra_resamp_generate_segment_lookup(seg_lookup_desc_t *ps_seg_lookup_table,
+                                                      WORD32 i4_dimension, WORD32 i4_mb_size,
+                                                      WORD32 i4_shift_val);
+
+/* C Declarations */
+extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic;
+extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic;
+extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic;
+
+/* SSE42 Declarations */
+extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_sse42;
+extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_sse42;
+extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_sse42;
+
+/* NEON Declarations */
+extern FT_INTERPOLATE_LUMA_2X isvc_interpolate_base_luma_dyadic_neon;
+extern FT_VERT_INTERPOLATE_CHROMA_2X isvc_vert_interpol_chroma_dyadic_neon;
+extern FT_HORZ_INTERPOLATE_CHROMA_2X isvc_horz_interpol_chroma_dyadic_neon;
+
+#endif
--- a/common/svc/isvc_iquant_itrans_recon.c
+++ b/common/svc/isvc_iquant_itrans_recon.c
--- a/common/svc/isvc_macros.h
+++ b/common/svc/isvc_macros.h
@ -0,0 +1,37 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvc_macros.h
+*
+* @brief
+*  Contains macro definitions used in SVC
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVC_MACROS_H_
+#define _ISVC_MACROS_H_
+
+#define FORCEINLINE __attribute__((always_inline)) inline
+
+#endif
--- a/common/svc/isvc_mem_fns.c
+++ b/common/svc/isvc_mem_fns.c
@ -0,0 +1,317 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ *******************************************************************************
+ * @file
+ *  isvc_mem_fns.c
+ *
+ * @brief
+ *  Functions used for memory operations
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *  isvc_memcpy()
+ *  isvc_memcpy_mul_8()
+ *  isvc_memset()
+ *  isvc_memset_mul_8()
+ *  isvc_memset_16bit()
+ *  isvc_memset_16bit_mul_8()
+ *  isvc_memory_alloc()
+ *  isvc_memory_free()
+ *
+ * @remarks
+ *  None
+ *
+ ******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "isvc_mem_fns.h"
+
+/**
+********************************************************************************
+*  @brief  copies a 2d blk from one location to another
+*
+*  @param[out] pu1_dst : dst pointer
+*
+*  @param[in] i4_dst_stride: stride of destination
+*
+*  @param[in] pu1_src : src ptr
+*
+*  @param[in] i4_src_stride: stride of src
+*
+*  @param[in] i4_blk_wd : blk width
+*
+*  @param[in] i4_blk_ht : blk height
+*
+*  @return void
+********************************************************************************
+*/
+
+void isvc_copy_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src, WORD32 i4_src_stride,
+                  WORD32 i4_blk_wd, WORD32 i4_blk_ht)
+{
+    WORD32 i;
+
+    for(i = 0; i < i4_blk_ht; i++)
+    {
+        memmove(pu1_dst, pu1_src, i4_blk_wd * sizeof(pu1_dst[0]));
+
+        pu1_dst += i4_dst_stride;
+        pu1_src += i4_src_stride;
+    }
+}
+
+/**
+********************************************************************************
+*  @brief  memsets a 2d blk
+*
+*  @param[out] pu1_dst : dst pointer
+*
+*  @param[in] i4_dst_stride: stride of destination
+*
+*  @param[in] i4_blk_wd : blk width
+*
+*  @param[in] i4_blk_ht : blk height
+*
+*  @return void
+********************************************************************************
+*/
+void isvc_memset_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
+                    WORD32 i4_blk_ht)
+{
+    WORD32 i;
+
+    for(i = 0; i < i4_blk_ht; i++)
+    {
+        memset(pu1_dst, u1_val, i4_blk_wd);
+
+        pu1_dst += i4_dst_stride;
+    }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function for copying to an interleaved destination
+ *
+ * @par Description:
+ *    Copies the array of width 'wd' and height 'ht' from the  location pointed
+ *    by 'src' to the location pointed by 'dst'
+ *
+ * @param[in] pu1_src
+ *  UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ *  UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ *  integer source stride
+ *
+ * @param[in] dst_strd
+ *  integer destination stride
+ *
+ * @param[in] ht
+ *  integer height of the array
+ *
+ * @param[in] wd
+ *  integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ *  The alternate elements of src will be copied to alternate locations in dsr
+ *  Other locations are not touched
+ *
+ *******************************************************************************
+ */
+void isvc_interleaved_copy(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
+                           WORD32 ht, WORD32 wd)
+{
+    WORD32 row, col;
+    wd *= 2;
+
+    for(row = 0; row < ht; row++)
+    {
+        for(col = 0; col < wd; col += 2)
+        {
+            pu1_dst[col] = pu1_src[col];
+        }
+
+        pu1_src += src_strd;
+        pu1_dst += dst_strd;
+    }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function for copying to an interleaved destination
+ *
+ * @par Description:
+ *    Copies the array of width 'wd' and height 'ht' from the  location pointed
+ *    by 'src' to the location pointed by 'dst'
+ *
+ * @param[in] pu1_src
+ *  UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ *  UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ *  integer source stride
+ *
+ * @param[in] dst_strd
+ *  integer destination stride
+ *
+ * @param[in] ht
+ *  integer height of the array
+ *
+ * @param[in] wd
+ *  integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ *  The alternate elements of src will be copied to alternate locations in dsr
+ *  Other locations are not touched
+ *
+ *******************************************************************************
+ */
+void isvc_16bit_interleaved_copy(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 dst_strd,
+                                 WORD32 ht, WORD32 wd)
+{
+    WORD32 row, col;
+    wd *= 2;
+
+    for(row = 0; row < ht; row++)
+    {
+        for(col = 0; col < wd; col += 2)
+        {
+            pi2_dst[col] = pi2_src[col];
+        }
+
+        pi2_src += src_strd;
+        pi2_dst += dst_strd;
+    }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function for memsetting to an interleaved destination
+ *
+ * @par Description:
+ *    Memsets the array of width 'wd' and height 'ht' pointed by 'src'
+ *
+ * @param[in] pu1_src
+ *  UWORD8 pointer to the source
+ *
+ * @param[in] src_strd
+ *  integer source stride
+ *
+ * @param[in] value
+ *  Value to set
+ *
+ * @param[in] ht
+ *  integer height of the array
+ *
+ * @param[in] wd
+ *  integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ *  The alternate elements of src will be copied to alternate locations in dsr
+ *  Other locations are not touched
+ *
+ *******************************************************************************
+ */
+void isvc_16bit_interleaved_memset(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value,
+                                   WORD32 i4_wd, WORD32 i4_ht)
+{
+    WORD32 row, col;
+
+    i4_wd *= 2;
+
+    for(row = 0; row < i4_ht; row++)
+    {
+        for(col = 0; col < i4_wd; col += 2)
+        {
+            pi2_src[col] = i2_value;
+        }
+
+        pi2_src += i4_src_strd;
+    }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Checks if any pixel in a block is non-zero
+ *
+ * @param[in] pu1_data
+ *  UWORD8 pointer to the block to be checked
+ *
+ * @param[in] i4_data_strd
+ *  Stride of data buffer
+ *
+ * @param[in] u4_wd
+ *  Width of the block
+ *
+ * @param[in] u4_ht
+ *  Height of the block
+ *
+ *******************************************************************************
+ */
+UWORD8 isvc_is_nonzero_blk(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd, UWORD32 u4_ht)
+{
+    UWORD32 i, j;
+
+    for(i = 0; i < u4_ht; i++)
+    {
+        for(j = 0; j < u4_wd; j++)
+        {
+            if(pu1_data[j + i * i4_data_strd])
+            {
+                return 1;
+            }
+        }
+    }
+
+    return 0;
+}
--- a/common/svc/isvc_mem_fns.h
+++ b/common/svc/isvc_mem_fns.h
@ -0,0 +1,109 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvc_mem_fns.h
+*
+* @brief
+*  Function declarations used for memory functions
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _ISVC_MEM_FNS_H_
+#define _ISVC_MEM_FNS_H_
+
+#include "ih264_typedefs.h"
+
+typedef void *FT_MEM_ALLOC(UWORD32 u4_size);
+
+typedef void FT_MEM_FREE(void *pv_mem);
+
+typedef void FT_MEMCPY(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void FT_COPY_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src,
+                        WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht);
+
+typedef void FT_MEMSET_2D(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
+                          WORD32 i4_blk_ht);
+
+typedef void FT_MEMSET(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+typedef void FT_MEMSET_16BIT(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words);
+
+typedef void FT_16BIT_INTERLEAVED_COPY(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd,
+                                       WORD32 dst_strd, WORD32 ht, WORD32 wd);
+
+typedef void FT_16BIT_INTERLEAVED_MEMSET(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value,
+                                         WORD32 i4_wd, WORD32 i4_ht);
+
+typedef UWORD8 FT_NONZERO_CHECKER(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32 u4_wd,
+                                  UWORD32 u4_ht);
+
+/* C function declarations */
+extern FT_MEMCPY ih264_memcpy;
+extern FT_MEMCPY ih264_memcpy_mul_8;
+extern FT_MEMSET ih264_memset;
+extern FT_MEMSET ih264_memset_mul_8;
+extern FT_MEMSET_16BIT ih264_memset_16bit;
+extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8;
+extern FT_COPY_2D isvc_copy_2d;
+extern FT_MEMSET_2D isvc_memset_2d;
+extern FT_16BIT_INTERLEAVED_COPY isvc_16bit_interleaved_copy;
+extern FT_16BIT_INTERLEAVED_MEMSET isvc_16bit_interleaved_memset;
+extern FT_NONZERO_CHECKER isvc_is_nonzero_blk;
+extern FT_MEM_ALLOC isvc_memory_alloc;
+extern FT_MEM_FREE isvc_memory_free;
+
+/* A9 Q function declarations */
+extern FT_MEMCPY isvc_memcpy_a9q;
+extern FT_MEMCPY ih264_memcpy_mul_8_a9q;
+extern FT_MEMSET ih264_memset_a9q;
+extern FT_MEMSET ih264_memset_mul_8_a9q;
+extern FT_MEMSET_16BIT ih264_memset_16bit_a9q;
+extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_a9q;
+
+/* AV8 function declarations */
+extern FT_MEMCPY ih264_memcpy_av8;
+extern FT_MEMCPY ih264_memcpy_mul_8_av8;
+extern FT_MEMSET ih264_memset_av8;
+extern FT_MEMSET ih264_memset_mul_8_av8;
+extern FT_MEMSET_16BIT ih264_memset_16bit_av8;
+extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_av8;
+
+/* NEON function declarations */
+extern FT_MEMSET_2D isvc_memset_2d_neon;
+
+/* SSSE3 variants */
+extern FT_MEMCPY ih264_memcpy_mul_8_ssse3;
+extern FT_MEMSET ih264_memset_mul_8_ssse3;
+extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_ssse3;
+extern FT_COPY_2D isvc_copy_2d_ssse3;
+
+/* SSE4.2 variants */
+extern FT_MEMSET_2D isvc_memset_2d_sse42;
+
+#endif
--- a/common/svc/isvc_resi_trans_quant.c
+++ b/common/svc/isvc_resi_trans_quant.c
@ -0,0 +1,840 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ *******************************************************************************
+ * @file
+ *  ih264_resi_trans_quant.c
+ *
+ * @brief
+ *  Contains function definitions single stage  forward transform for H.264
+ *  It will calculate the residue, do the cf and then do quantization
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *  - ih264_resi_trans_quant_4x4()
+ *  - ih264_resi_trans_quant_chroma_4x4
+ *  - ih264_hadamard_quant_4x4
+ *  - ih264_hadamard_quant_2x2_uv
+ *  - ih264_resi_trans_quant_8x8
+ *
+ * @remarks
+ *******************************************************************************
+ */
+/* System include files */
+#include <stdbool.h>
+#include <stddef.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_macros.h"
+#include "ih264_trans_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+
+static FORCEINLINE WORD16 isvc_subtract_upsampled_res(WORD16 i2_residue, WORD16 i2_upsampled_res)
+{
+    return (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_residue - i2_upsampled_res));
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward transform and quantization on a 4*4 block
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] i4_src_stride
+ *   Source stride
+ *
+ * @param[in] i4_pred_stride
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   None
+ *
+ *******************************************************************************
+ */
+void isvc_resi_trans_quant_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                               buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
+                               resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
+                               WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
+{
+    UWORD32 i;
+    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+    WORD32 i4_value;
+
+    UWORD8 *pu1_src = ps_src->pv_data;
+    UWORD8 *pu1_pred = ps_pred->pv_data;
+    WORD16 *pi2_out = ps_out->pv_data;
+    WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
+    WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
+    WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
+    WORD16 *pi2_out_tmp = pi2_out;
+    UWORD32 u4_nonzero_coeff = 0;
+    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
+    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
+    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
+    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
+
+    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        /* computing prediction error (residue) */
+        x4 = pu1_src[0] - pu1_pred[0];
+        x5 = pu1_src[1] - pu1_pred[1];
+        x6 = pu1_src[2] - pu1_pred[2];
+        x7 = pu1_src[3] - pu1_pred[3];
+
+        if(u1_use_upsampled_res)
+        {
+            x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
+            x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
+            x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
+            x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
+        }
+
+        /* Horizontal transform */
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        pi2_out_tmp[0] = x0 + x1;
+        pi2_out_tmp[1] = (x3 << 1) + x2;
+        pi2_out_tmp[2] = x0 - x1;
+        pi2_out_tmp[3] = x3 - (x2 << 1);
+
+        /* pointing to next row; */
+        pu1_src += i4_src_stride;
+        pu1_pred += i4_pred_stride;
+        pi2_out_tmp += 4;
+        pi2_upsampled_res += i4_upsampled_res_stride;
+    }
+
+    pi2_out_tmp = pi2_out;
+
+    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        /* Vertical transform and quantization */
+        x4 = pi2_out_tmp[0];
+        x5 = pi2_out_tmp[4];
+        x6 = pi2_out_tmp[8];
+        x7 = pi2_out_tmp[12];
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        /* quantization is done in place */
+
+        i4_value = x0 + x1;
+
+        if(i == 0)
+        {
+            (*pi2_dc_out) = i4_value;
+        }
+
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[0] = i4_value;
+
+        i4_value = (x3 << 1) + x2;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[4] = i4_value;
+
+        i4_value = x0 - x1;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[8] = i4_value;
+
+        i4_value = x3 - (x2 << 1);
+        FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
+                  u4_qbits, u4_nonzero_coeff);
+        pi2_out_tmp[12] = i4_value;
+
+        pi2_out_tmp++;
+        pu2_scale_matrix++;
+        pu2_threshold_matrix++;
+    }
+
+    /* Return total nonzero coefficients in the current sub block */
+    *pu1_nnz = u4_nonzero_coeff;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward transform and quantization on a 4*4 chroma
+ *block with interleaved values
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] i4_src_stride
+ *   Source stride
+ *
+ * @param[in] i4_pred_stride
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   None
+ *
+ *******************************************************************************
+ */
+void isvc_resi_trans_quant_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                                      buffer_container_t *ps_out,
+                                      buffer_container_t *ps_upsampled_res,
+                                      resi_trans_quant_constants_t *ps_quant_constants,
+                                      UWORD8 *pu1_nnz, WORD16 *pi2_dc_out,
+                                      UWORD8 u1_use_upsampled_res)
+{
+    UWORD32 i;
+    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+    WORD32 i4_value;
+
+    UWORD8 *pu1_src = ps_src->pv_data;
+    UWORD8 *pu1_pred = ps_pred->pv_data;
+    WORD16 *pi2_out = ps_out->pv_data;
+    WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
+    WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
+    WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
+    WORD16 *pi2_out_tmp = pi2_out;
+    UWORD32 u4_nonzero_coeff = 0;
+    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
+    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
+    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
+    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
+
+    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        /* computing prediction error (residue) */
+        x4 = pu1_src[0] - pu1_pred[0];
+        x5 = pu1_src[2] - pu1_pred[2];
+        x6 = pu1_src[4] - pu1_pred[4];
+        x7 = pu1_src[6] - pu1_pred[6];
+
+        if(u1_use_upsampled_res)
+        {
+            x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
+            x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
+            x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
+            x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
+        }
+
+        /* Horizontal transform */
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        pi2_out_tmp[0] = x0 + x1;
+        pi2_out_tmp[1] = (x3 << 1) + x2;
+        pi2_out_tmp[2] = x0 - x1;
+        pi2_out_tmp[3] = x3 - (x2 << 1);
+
+        /* pointing to next row; */
+        pu1_src += i4_src_stride;
+        pu1_pred += i4_pred_stride;
+        pi2_out_tmp += 4;
+        pi2_upsampled_res += i4_upsampled_res_stride;
+    }
+    pi2_out_tmp = pi2_out;
+    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        /* Vertical transform and quantization */
+        x4 = pi2_out_tmp[0];
+        x5 = pi2_out_tmp[4];
+        x6 = pi2_out_tmp[8];
+        x7 = pi2_out_tmp[12];
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        /* quantization is done in place */
+
+        i4_value = x0 + x1;
+
+        if(i == 0)
+        {
+            *pi2_dc_out = i4_value;
+        }
+
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[0] = i4_value;
+
+        i4_value = (x3 << 1) + x2;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[4] = i4_value;
+
+        i4_value = x0 - x1;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[8] = i4_value;
+
+        i4_value = x3 - (x2 << 1);
+        FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
+                  u4_qbits, u4_nonzero_coeff);
+        pi2_out_tmp[12] = i4_value;
+
+        pi2_out_tmp++;
+        pu2_scale_matrix++;
+        pu2_threshold_matrix++;
+    }
+
+    /* Return total nonzero coefficients in the current sub block */
+    *pu1_nnz = u4_nonzero_coeff;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward hadamard transform and quantization on a 4*4
+ *block
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] i4_src_stride
+ *   Source stride
+ *
+ * @param[in] i4_pred_stride
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   None
+ *
+ */
+
+void isvc_hadamard_quant_4x4(WORD16 *pi2_src, WORD16 *pi2_dst,
+                             resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
+{
+    WORD32 i;
+    WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value;
+
+    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
+    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
+    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
+    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
+
+    *pu1_nnz = 0;
+
+    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        x4 = pi2_src[0];
+        x5 = pi2_src[1];
+        x6 = pi2_src[2];
+        x7 = pi2_src[3];
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        pi2_dst[0] = x0 + x1;
+        pi2_dst[1] = x3 + x2;
+        pi2_dst[2] = x0 - x1;
+        pi2_dst[3] = x3 - x2;
+
+        pi2_src += 4;
+        pi2_dst += 4;
+    }
+
+    /* Vertical transform and quantization */
+    pi2_dst -= SUB_BLK_WIDTH_4x4 << 2;
+
+    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        x4 = pi2_dst[0];
+        x5 = pi2_dst[4];
+        x6 = pi2_dst[8];
+        x7 = pi2_dst[12];
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        i4_value = (x0 + x1) >> 1;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[0]);
+        pi2_dst[0] = i4_value;
+
+        i4_value = (x3 + x2) >> 1;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[0]);
+        pi2_dst[4] = i4_value;
+
+        i4_value = (x0 - x1) >> 1;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[0]);
+        pi2_dst[8] = i4_value;
+
+        i4_value = (x3 - x2) >> 1;
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[0]);
+        pi2_dst[12] = i4_value;
+
+        pi2_dst++;
+    }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward hadamard transform and quantization on a 2*2
+ *block for both U and V planes
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] i4_src_stride
+ *   Source stride
+ *
+ * @param[in] i4_pred_stride
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   NNZ for dc is populated at 0 and 5th position of pu1_nnz
+ *
+ */
+
+void isvc_hadamard_quant_2x2_uv(WORD16 *pi2_src, WORD16 *pi2_dst,
+                                resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
+{
+    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+    WORD32 i4_value, plane;
+
+    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
+    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
+    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
+    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
+
+    for(plane = 0; plane < 2; plane++)
+    {
+        pu1_nnz[plane] = 0;
+
+        /* Horizontal transform */
+        x4 = pi2_src[0];
+        x5 = pi2_src[1];
+        x6 = pi2_src[2];
+        x7 = pi2_src[3];
+
+        x0 = x4 + x5;
+        x1 = x4 - x5;
+        x2 = x6 + x7;
+        x3 = x6 - x7;
+
+        /* Vertical transform and quantization */
+        i4_value = (x0 + x2);
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[0] = i4_value;
+
+        i4_value = (x0 - x2);
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[2] = i4_value;
+
+        i4_value = (x1 - x3);
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[3] = i4_value;
+
+        i4_value = (x1 + x3);
+        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[1] = i4_value;
+
+        pi2_dst += 4;
+        pi2_src += 4;
+    }
+}
+
+/*
+ *******************************************************************************
+ *
+ * @brief
+ *  This function performs Single stage forward transform CF8 and quantization
+ *on 8*8 blocks for h.264
+ *
+ * @par Description:
+ *  Performs single stage 8x8 forward transform CF8 after calculating the
+ *residue The result is then quantized
+ *
+ * @param[in] pu1_src
+ *  Input 8x8 pixels
+ *
+ * @param[in] pu1_pred
+ *  Input 8x8 pixels
+ *
+ * @param[in] pi1_out
+ * Output 8x8 pixels
+ *
+ * @param[in] u4_thresh
+ *  Threshold under which the coeffs are not quantized
+ *
+ *  @param[in] u4_qp_div
+ *  QP/6
+ *
+ *  @param[in] u4_qp_rem
+ *  QP%6
+ *
+ * @param[in] u2_src_stride
+ *  Source stride
+ *
+ * @param[in] i4_pred_stride
+ * stride for prediciton buffer
+ *
+ *  @param[in] dst_strd
+ *  stride for destination buffer
+ *
+ *  @param[in] pu4_quant_mat
+ *  Pointer to the 4x4 quantization matrix
+ *
+ * @returns  Void
+ *
+ *
+ *******************************************************************************
+ */
+void isvc_resi_trans_quant_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                               buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
+                               resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
+                               WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
+{
+    UWORD32 i;
+    WORD32 a0, a1, a2, a3, a4, a5, a6, a7;
+    WORD32 r0, r1, r2, r3, r4, r5, r6, r7;
+
+    UWORD8 *pu1_src = ps_src->pv_data;
+    UWORD8 *pu1_pred = ps_pred->pv_data;
+    WORD16 *pi2_out = ps_out->pv_data;
+    WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
+    WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
+    WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
+    WORD16 *pi2_out_tmp = pi2_out;
+    UWORD32 u4_nonzero_coeff = 0;
+    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
+    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
+    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
+    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
+
+    UNUSED(pi2_dc_out);
+
+    /*Horizontal transform */
+    /* we are going to use the a's and r's in a twisted way since */
+    /*i dont want to declare more variables */
+    for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
+    {
+        r0 = pu1_src[0];
+        r0 -= pu1_pred[0];
+        r1 = pu1_src[1];
+        r1 -= pu1_pred[1];
+        r2 = pu1_src[2];
+        r2 -= pu1_pred[2];
+        r3 = pu1_src[3];
+        r3 -= pu1_pred[3];
+        r4 = pu1_src[4];
+        r4 -= pu1_pred[4];
+        r5 = pu1_src[5];
+        r5 -= pu1_pred[5];
+        r6 = pu1_src[6];
+        r6 -= pu1_pred[6];
+        r7 = pu1_src[7];
+        r7 -= pu1_pred[7];
+
+        if(u1_use_upsampled_res)
+        {
+            r0 = isvc_subtract_upsampled_res(r0, pi2_upsampled_res[0]);
+            r1 = isvc_subtract_upsampled_res(r1, pi2_upsampled_res[1]);
+            r2 = isvc_subtract_upsampled_res(r2, pi2_upsampled_res[2]);
+            r3 = isvc_subtract_upsampled_res(r3, pi2_upsampled_res[3]);
+            r4 = isvc_subtract_upsampled_res(r4, pi2_upsampled_res[4]);
+            r5 = isvc_subtract_upsampled_res(r5, pi2_upsampled_res[5]);
+            r6 = isvc_subtract_upsampled_res(r6, pi2_upsampled_res[6]);
+            r7 = isvc_subtract_upsampled_res(r7, pi2_upsampled_res[7]);
+        }
+
+        a0 = r0 + r7;
+        a1 = r1 + r6;
+        a2 = r2 + r5;
+        a3 = r3 + r4;
+
+        a4 = a0 + a3;
+        a5 = a1 + a2;
+        a6 = a0 - a3;
+        a7 = a1 - a2;
+
+        pi2_out_tmp[0] = a4 + a5;
+
+        pi2_out_tmp[2] = a6 + (a7 >> 1);
+        pi2_out_tmp[4] = a4 - a5;
+        pi2_out_tmp[6] = (a6 >> 1) - a7;
+
+        a0 = r0 - r7;
+        a1 = r1 - r6;
+        a2 = r2 - r5;
+        a3 = r3 - r4;
+
+        a4 = a1 + a2 + ((a0 >> 1) + a0);
+        a5 = a0 - a3 - ((a2 >> 1) + a2);
+        a6 = a0 + a3 - ((a1 >> 1) + a1);
+        a7 = a1 - a2 + ((a3 >> 1) + a3);
+
+        pi2_out_tmp[1] = a4 + (a7 >> 2);
+        pi2_out_tmp[3] = a5 + (a6 >> 2);
+        pi2_out_tmp[5] = a6 - (a5 >> 2);
+        pi2_out_tmp[7] = (a4 >> 2) - a7;
+
+        pu1_src += i4_src_stride;
+        pu1_pred += i4_pred_stride;
+        pi2_out_tmp += 8;
+        pi2_upsampled_res += i4_upsampled_res_stride;
+    }
+
+    /*vertical transform and quant */
+
+    pi2_out_tmp = pi2_out;
+
+    for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
+    {
+        r0 = pi2_out_tmp[0];
+        r1 = pi2_out_tmp[8];
+        r2 = pi2_out_tmp[16];
+        r3 = pi2_out_tmp[24];
+        r4 = pi2_out_tmp[32];
+        r5 = pi2_out_tmp[40];
+        r6 = pi2_out_tmp[48];
+        r7 = pi2_out_tmp[56];
+
+        a0 = r0 + r7;
+        a1 = r1 + r6;
+        a2 = r2 + r5;
+        a3 = r3 + r4;
+
+        a4 = a0 + a3;
+        a5 = a1 + a2;
+        a6 = a0 - a3;
+        a7 = a1 - a2;
+
+        a0 = r0 - r7;
+        a1 = r1 - r6;
+        a2 = r2 - r5;
+        a3 = r3 - r4;
+
+        r0 = a4 + a5;
+        r2 = a6 + (a7 >> 1);
+        r4 = a4 - a5;
+        r6 = (a6 >> 1) - a7;
+
+        a4 = a1 + a2 + ((a0 >> 1) + a0);
+        a5 = a0 - a3 - ((a2 >> 1) + a2);
+        a6 = a0 + a3 - ((a1 >> 1) + a1);
+        a7 = a1 - a2 + ((a3 >> 1) + a3);
+
+        r1 = a4 + (a7 >> 2);
+        r3 = a5 + (a6 >> 2);
+        r5 = a6 - (a5 >> 2);
+        r7 = (a4 >> 2) - a7;
+
+        FWD_QUANT(r0, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[0] = r0;
+
+        FWD_QUANT(r1, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[8] = r1;
+
+        FWD_QUANT(r2, pu2_threshold_matrix[16], pu2_scale_matrix[16], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[16] = r2;
+
+        FWD_QUANT(r3, pu2_threshold_matrix[24], pu2_scale_matrix[24], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[24] = r3;
+
+        FWD_QUANT(r4, pu2_threshold_matrix[32], pu2_scale_matrix[32], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[32] = r4;
+
+        FWD_QUANT(r5, pu2_threshold_matrix[40], pu2_scale_matrix[40], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[40] = r5;
+
+        FWD_QUANT(r6, pu2_threshold_matrix[48], pu2_scale_matrix[48], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[48] = r6;
+
+        FWD_QUANT(r7, pu2_threshold_matrix[56], pu2_scale_matrix[56], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[56] = r7;
+
+        pi2_out_tmp++;
+        pu2_scale_matrix++;
+        pu2_threshold_matrix++;
+    }
+    /* Return total nonzero coefficients in the current sub block */
+    *pu1_nnz = u4_nonzero_coeff;
+}
--- a/common/svc/isvc_structs.h
+++ b/common/svc/isvc_structs.h
@ -0,0 +1,335 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvc_structs.h
+*
+* @brief
+*  Contains struct definition used for SVC
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVC_STRUCTS_H_
+#define _ISVC_STRUCTS_H_
+
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ih264_defs.h"
+#include "ih264_structs.h"
+#include "isvc_defs.h"
+
+typedef struct buffer_container_t
+{
+    void *pv_data;
+
+    WORD32 i4_data_stride;
+
+} buffer_container_t;
+
+typedef struct yuv_buf_props_t
+{
+    buffer_container_t as_component_bufs[NUM_COMPONENTS];
+
+    IV_COLOR_FORMAT_T e_color_format;
+
+    UWORD32 u4_width;
+
+    UWORD32 u4_height;
+
+    UWORD8 u1_bit_depth;
+} yuv_buf_props_t;
+
+typedef struct nal_unit_header_t
+{
+    UWORD8 u1_nal_ref_idc;
+
+    UWORD8 u1_nal_unit_type;
+} nal_unit_header_t;
+
+typedef struct coordinates_t
+{
+    WORD32 i4_abscissa;
+
+    WORD32 i4_ordinate;
+} coordinates_t;
+
+typedef struct svc_au_buf_t
+{
+    /* Array of structs that contain properties of the buffers used for storing */
+    yuv_buf_props_t *ps_layer_yuv_buf_props;
+
+    /* Temporal ID */
+    WORD8 i1_temporal_id;
+
+    /* Num Spatial Layers */
+    UWORD8 u1_num_spatial_layers;
+
+    /* Resolution ration b/w spatial layers */
+    DOUBLE d_spatial_res_ratio;
+
+    /* absolute value of POC */
+    WORD32 i4_abs_poc;
+
+    /* POC % MaxPicOrderCntLSB */
+    WORD32 i4_poc_lsb;
+
+    /* Lower 32 bits of time stamp */
+    UWORD32 u4_timestamp_low;
+
+    /* Higher 32 bits of time stamp */
+    UWORD32 u4_timestamp_high;
+
+    /* Is Pic used as refPic for future frames? */
+    WORD32 i4_used_as_ref;
+
+    /* frame_num in the slice header */
+    WORD32 i4_frame_num;
+
+    /*
+     *  0: Top Field
+     *  1: Bottom Field
+     */
+    WORD8 i1_field_type;
+
+    /* buffer ID from frame buffer manager */
+    WORD32 i4_buf_id;
+
+} svc_au_buf_t;
+
+typedef struct svc_nalu_ext_t
+{
+    nal_unit_header_t s_nalu_header;
+
+    /* idr_flag */
+    UWORD8 u1_idr_flag;
+
+    /* priority_id (Range = [0, 63]) */
+    UWORD8 u1_priority_id;
+
+    /* no_inter_layer_pred_flag */
+    UWORD8 u1_no_inter_layer_pred_flag;
+
+    /* dependency_id (Range = [0, 7]) */
+    UWORD8 u1_dependency_id;
+
+    /* quality_id (Range = [0, 15]) */
+    UWORD8 u1_quality_id;
+
+    /* temporal_id (Range = [0, 7]) */
+    UWORD8 u1_temporal_id;
+
+    /* use_ref_base_pic_flag */
+    UWORD8 u1_use_ref_base_pic_flag;
+
+    /* discardable_flag */
+    UWORD8 u1_discardable_flag;
+
+    /* output_flag */
+    UWORD8 u1_output_flag;
+
+    /* reserved_three_2bits */
+    UWORD8 u1_reserved_three_2bits;
+
+} svc_nalu_ext_t;
+
+typedef struct svc_vui_ext_t
+{
+    /* specifies the maximum layers in the SVC bitstream */
+    UWORD32 u4_vui_ext_num_entries_minus1;
+
+    /* specifies the dependency ID for each layer */
+    UWORD8 u1_vui_ext_dependency_id[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the quality ID for each layer */
+    UWORD8 u1_vui_ext_quality_id[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the temporal ID for each layer */
+    UWORD8 u1_vui_ext_temporal_id[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the timing_info_present_flag value of the i-th sub-bitstream */
+    UWORD8 u1_vui_ext_timing_info_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the num_units_in_tick value of the i-th sub-bitstream */
+    UWORD32 u4_vui_ext_num_units_in_tick[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the time_scale value of the i-th sub-bitstream */
+    UWORD32 u4_vui_ext_time_scale[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the fixed_frame_rate_flag value of the i-th sub-bitstream */
+    UWORD8 u1_vui_ext_fixed_frame_rate_flag[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the nal_hrd_parameters_present_flag value of the i-th */
+    UWORD8 u1_vui_ext_nal_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the vcl_hrd_parameters_present_flag value of the i-th */
+    UWORD8 u1_vui_ext_vcl_hrd_params_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the low_delay_hrd_flag value of the i-th sub-bitstream */
+    UWORD8 u1_vui_ext_low_delay_hrd_flag[MAX_VUI_EXT_NUM_ENTRIES];
+
+    /* specifies the pic_struct_present_flag value of the i-th sub-bitstream */
+    UWORD8 u1_vui_ext_pic_struct_present_flag[MAX_VUI_EXT_NUM_ENTRIES];
+
+} svc_vui_ext_t;
+
+typedef struct sps_svc_ext_t
+{
+    /* inter_layer_deblocking_filter_control_present_flag */
+    UWORD8 u1_inter_layer_deblocking_filter_control_present_flag;
+
+    /* extended_spatial_scalability_idc */
+    UWORD8 u1_extended_spatial_scalability_idc;
+
+    /* chroma_phase_x_plus1_flag */
+    UWORD8 u1_chroma_phase_x_plus1;
+
+    /* chroma_phase_y_plus1 */
+    UWORD8 u1_chroma_phase_y_plus1;
+
+    /* seq_ref_layer_chroma_phase_x_plus1_flag */
+    UWORD8 u1_seq_ref_layer_chroma_phase_x_plus1_flag;
+
+    /* seq_ref_layer_chroma_phase_y_plus1 */
+    UWORD8 u1_seq_ref_layer_chroma_phase_y_plus1;
+
+    /* seq_scaled_ref_layer_left_offset */
+    WORD32 i4_seq_scaled_ref_layer_left_offset;
+
+    /* seq_scaled_ref_layer_top_offset */
+    WORD32 i4_seq_scaled_ref_layer_top_offset;
+
+    /* seq_scaled_ref_layer_right_offset */
+    WORD32 i4_seq_scaled_ref_layer_right_offset;
+
+    /* seq_scaled_ref_layer_bottom_offset */
+    WORD32 i4_seq_scaled_ref_layer_bottom_offset;
+
+    /* seq_tcoeff_level_prediction_flag */
+    WORD8 i1_seq_tcoeff_level_prediction_flag;
+
+    /* adaptive_tcoeff_level_prediction_flag */
+    WORD8 i1_adaptive_tcoeff_level_prediction_flag;
+
+    /* slice_header_restriction_flag */
+    WORD8 i1_slice_header_restriction_flag;
+
+} sps_svc_ext_t;
+
+typedef struct subset_sps_t
+{
+    /* SPS structure */
+    sps_t s_sps;
+
+    /* Structure containing flags specific to SVC SPS */
+    sps_svc_ext_t s_sps_svc_ext;
+
+    /* svc_vui_parameters_present_flag */
+    WORD8 i1_svc_vui_parameters_present_flag;
+
+    svc_vui_ext_t s_svc_vui;
+
+    /* additional_extension2_data_flag */
+    WORD8 i1_additional_extension2_flag;
+
+} subset_sps_t;
+
+typedef struct svc_slice_header_t
+{
+    /* ref_layer_dq_id */
+    UWORD32 u4_ref_layer_dq_id;
+
+    /* disable_inter_layer_deblocking_filter_idc */
+    UWORD32 u4_disable_inter_layer_deblocking_filter_idc;
+
+    /* inter_layer_slice_alpha_c0_offset_div2 */
+    WORD32 i4_inter_layer_slice_alpha_c0_offset_div2;
+
+    /* inter_layer_slice_beta_offset_div2 */
+    WORD32 i4_inter_layer_slice_beta_offset_div2;
+
+    /* constrained_intra_resampling_flag */
+    WORD8 i1_constrained_intra_resampling_flag;
+
+    /* ref_layer_chroma_phase_x_plus1_flag */
+    WORD8 i1_ref_layer_chroma_phase_x_plus1_flag;
+
+    /* ref_layer_chroma_phase_y_plus1 */
+    WORD8 i1_ref_layer_chroma_phase_y_plus1;
+
+    /* scaled_ref_layer_left_offset */
+    WORD32 i4_scaled_ref_layer_left;
+
+    /* scaled_ref_layer_top_offset */
+    WORD32 i4_scaled_ref_layer_top;
+
+    /* scaled_ref_layer_right_offset */
+    WORD32 i4_scaled_ref_layer_right;
+
+    /* scaled_ref_layer_bottom_offset */
+    WORD32 i4_scaled_ref_layer_bottom;
+
+    /* slice_skip_flag */
+    WORD8 i1_slice_skip_flag;
+
+    /* num_mbs_in_slice_minus1 */
+    UWORD32 u4_num_mbs_in_slice_minus1;
+
+    /* adaptive_base_mode_flag */
+    WORD8 i1_adaptive_base_mode_flag;
+
+    /* default_base_mode_flag */
+    WORD8 i1_default_base_mode_flag;
+
+    /* adaptive_motion_prediction_flag */
+    WORD8 i1_adaptive_motion_prediction_flag;
+
+    /* default_motion_prediction_flag */
+    WORD8 i1_default_motion_prediction_flag;
+
+    /* adaptive_residual_prediction_flag */
+    WORD8 i1_adaptive_residual_prediction_flag;
+
+    /* default_residual_prediction_flag */
+    WORD8 i1_default_residual_prediction_flag;
+
+    /* tcoeff_level_prediction_flag */
+    WORD8 i1_tcoeff_level_prediction_flag;
+
+    /* scan_idx_start */
+    UWORD32 u4_scan_idx_start;
+
+    /* scan_idx_end */
+    UWORD32 u4_scan_idx_end;
+
+    WORD32 i4_store_ref_base_pic_flag;
+
+    slice_header_t s_slice_header;
+} svc_slice_header_t;
+
+#endif
--- a/common/svc/isvc_trans_quant_itrans_iquant.h
+++ b/common/svc/isvc_trans_quant_itrans_iquant.h
@ -0,0 +1,253 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ *******************************************************************************
+ * @file
+ *  isvc_trans_quant.h
+ *
+ * @brief
+ *  Contains declarations for forward and inverse transform paths for H264
+ *
+ * @author
+ *  Ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_
+#define _ISVC_TRANS_QUANT_ITRANS_IQUANT_H_
+
+#include <stdint.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_debug.h"
+#include "ih264_macros.h"
+#include "isvc_macros.h"
+#include "isvc_structs.h"
+
+/* With and without residual_pred use */
+#define NUM_RESI_TRANS_QUANT_VARIANTS 2
+
+#define NUM_IQ_IT_RECON_VARIANTS 3
+
+/* Structs */
+typedef struct resi_trans_quant_constants_t
+{
+    const UWORD16 *pu2_scale_matrix;
+
+    const UWORD16 *pu2_threshold_matrix;
+
+    UWORD32 u4_qbits;
+
+    UWORD32 u4_round_factor;
+} resi_trans_quant_constants_t;
+
+typedef struct iq_it_res_rec_constants_t
+{
+    const UWORD16 *pu2_iscal_mat;
+
+    const UWORD16 *pu2_weigh_mat;
+
+    UWORD32 u4_qp_div_6;
+} iq_it_res_rec_constants_t;
+
+/* Typedefs */
+typedef void FT_RESI_TRANS_DCTRANS_QUANT(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out,
+                                         WORD32 src_strd, WORD32 pred_strd, WORD32 dst_strd,
+                                         const UWORD16 *pu2_scale_mat,
+                                         const UWORD16 *pu2_thresh_mat, UWORD32 u4_qbit,
+                                         UWORD32 u4_round_fact, UWORD8 *pu1_nnz);
+
+typedef void FT_IDCTRANS_IQUANT_ITRANS_RECON(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out,
+                                             WORD32 src_strd, WORD32 pred_strd, WORD32 out_strd,
+                                             const UWORD16 *pu2_iscale_mat,
+                                             const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
+                                             UWORD32 pi4_cntrl, WORD32 *pi4_tmp);
+
+typedef void FT_RESI_TRANS_QUANT(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                                 buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
+                                 resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
+                                 WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res);
+
+typedef void FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT(
+    UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+    WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix,
+    UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag);
+
+typedef void FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT(
+    UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+    WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix,
+    UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz);
+
+typedef void FT_IQ_IT_RECON(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                            buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
+                            buffer_container_t *ps_rec,
+                            iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp,
+                            WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate);
+
+typedef void FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON(
+    WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+    WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
+    UWORD32 pi4_cntrl, UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp);
+
+typedef void FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON(
+    WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+    WORD32 out_strd, const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, UWORD32 qp_div,
+    UWORD32 pi4_cntrl, WORD32 *pi4_tmp);
+
+typedef void FT_IHADAMARD_SCALING(WORD16 *pi2_src, WORD16 *pi2_out, const UWORD16 *pu2_iscal_mat,
+                                  const UWORD16 *pu2_weigh_mat, UWORD32 u4_qp_div_6,
+                                  WORD32 *pi4_tmp);
+
+typedef void FT_HADAMARD_QUANT(WORD16 *pi2_src, WORD16 *pi2_dst,
+                               resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz);
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_8x8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc;
+extern FT_IQ_IT_RECON isvc_zcbf_iquant_itrans_recon_4x4;
+extern FT_IQ_IT_RECON isvc_chroma_zcbf_iquant_itrans_recon_4x4;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv;
+extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4;
+extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv;
+
+/* A9 Declarations */
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_a9;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_a9;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_a9;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_a9;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_a9;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_a9;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_a9;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_a9;
+extern FT_LUMA_16X16_RESI_TRANS_DCTRANS_QUANT isvc_luma_16x16_resi_trans_dctrans_quant_a9;
+extern FT_CHROMA_8X8_RESI_TRANS_DCTRANS_QUANT isvc_chroma_8x8_resi_trans_dctrans_quant_a9;
+extern FT_LUMA_16X16_IDCTRANS_IQUANT_ITRANS_RECON isvc_luma_16x16_idctrans_iquant_itrans_recon_a9;
+extern FT_CHROMA_8X8_IDCTRANS_IQUANT_ITRANS_RECON isvc_chroma_8x8_idctrans_iquant_itrans_recon_a9;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_a9;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_a9;
+extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_a9;
+extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_a9;
+
+/* Av8 Declarations */
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_av8;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_av8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_av8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_av8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_av8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_av8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_av8;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_av8;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_av8;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_av8;
+
+/* NEON Declarations */
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_neon;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_residual_sub_neon;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_neon;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
+
+/* SSSE3 Declarations */
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_ssse3;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_ssse3;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_ssse3;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_8x8_dc_ssse3;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_ssse3;
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_2x2_uv_ssse3;
+
+/* SSSE42 Declarations */
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_sse42;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_4x4_with_res_pred_sse42;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_sse42;
+extern FT_RESI_TRANS_QUANT isvc_resi_trans_quant_chroma_4x4_with_res_pred_sse42;
+
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42;
+
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42;
+
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_dc_4x4_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_4x4_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42;
+
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42;
+
+extern FT_IHADAMARD_SCALING ih264_ihadamard_scaling_4x4_sse42;
+
+extern FT_HADAMARD_QUANT isvc_hadamard_quant_4x4_sse42;
+extern FT_HADAMARD_QUANT isvc_hadamard_quant_2x2_uv_sse42;
+
+/* NEON Declarations */
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_output_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
+
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
+
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
+
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
+extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
+
+static FORCEINLINE UWORD8 isvc_get_resi_trans_quant_variant_idx(UWORD8 u1_use_upsampled_res)
+{
+    return u1_use_upsampled_res;
+}
+
+static FORCEINLINE UWORD8 isvc_get_iq_it_recon_variant_idx(UWORD8 u1_is_intra,
+                                                           UWORD8 u1_res_accumulate)
+{
+    ASSERT(!((1 == u1_is_intra) && (1 == u1_res_accumulate)));
+
+    return u1_is_intra * 2 + u1_res_accumulate;
+}
+
+static FORCEINLINE WORD16 isvc_get_residue(WORD16 i2_it_out, WORD16 i2_res_pred,
+                                           UWORD8 u1_res_accumulate)
+{
+    return (u1_res_accumulate
+                ? (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out + i2_res_pred))
+                : (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_it_out)));
+}
+
+#endif
--- a/common/svccommon.cmake
+++ b/common/svccommon.cmake
@ -0,0 +1,39 @@
+# src files
+list(
+  APPEND
+  LIBAVC_COMMON_SRCS
+  "${AVC_ROOT}/common/svc/isvc_common_tables.c"
+  "${AVC_ROOT}/common/svc/isvc_cabac_tables.c"
+  "${AVC_ROOT}/common/svc/isvc_intra_resample.c"
+  "${AVC_ROOT}/common/svc/isvc_iquant_itrans_recon.c"
+  "${AVC_ROOT}/common/svc/isvc_mem_fns.c"
+  "${AVC_ROOT}/common/svc/isvc_resi_trans_quant.c")
+
+include_directories(${AVC_ROOT}/common/svc)
+
+# arm/x86 sources
+if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR
+   "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch32")
+  list(
+    APPEND
+    LIBAVC_COMMON_ASMS
+    "${AVC_ROOT}/common/arm/svc/isvc_intra_sampling_neon.c"
+    "${AVC_ROOT}/common/arm/svc/isvc_iquant_itrans_recon_neon.c"
+    "${AVC_ROOT}/common/arm/svc/isvc_mem_fns_neon.c"
+    "${AVC_ROOT}/common/arm/svc/isvc_resi_trans_quant_neon.c")
+  include_directories(${AVC_ROOT}/common/arm/svc)
+else()
+  list(
+    APPEND
+    LIBAVC_COMMON_SRCS
+    "${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c"
+    "${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_sse42.c"
+    "${AVC_ROOT}/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c"
+    "${AVC_ROOT}/common/x86/svc/isvc_mem_fns_sse42.c"
+    "${AVC_ROOT}/common/x86/svc/isvc_mem_fns_ssse3.c"
+    "${AVC_ROOT}/common/x86/svc/isvc_padding_ssse3.c"
+    "${AVC_ROOT}/common/x86/svc/isvc_resi_trans_quant_sse42.c"
+    "${AVC_ROOT}/common/x86/svc/isvc_intra_resample_sse42.c")
+
+  include_directories(${AVC_ROOT}/common/x86/svc)
+endif()
--- a/common/x86/svc/isvc_intra_resample_sse42.c
+++ b/common/x86/svc/isvc_intra_resample_sse42.c
@ -0,0 +1,658 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/*!
+ **************************************************************************
+
+ * * \file ih264d_resamp_svc.c
+ *
+ * \brief
+ *    Contains routines that
+ * resample for SVC resampling
+ *
+ * Detailed_description
+ *
+ * \date
+ *
+ *
+ *
+ * \author
+
+ * **************************************************************************
+
+ */
+#include <immintrin.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_debug.h"
+#include "isvc_intra_resample.h"
+
+void isvc_interpolate_base_luma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
+                                             UWORD8 *pu1_out_buf, WORD32 i4_out_stride)
+{
+    WORD32 i4_y;
+    WORD32 i4_filt_stride, i4_src_stride;
+    UWORD8 *pu1_inp, *pu1_out;
+    WORD16 *pi2_tmp;
+
+    __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3;
+    __m128i i4_samp_8x16b_0, i4_samp_8x16b_1, i4_samp_8x16b_2, i4_samp_8x16b_3;
+    __m128i i4_res_8x16b_r1_1, i4_res_8x16b_r1_2, i4_res_8x16b_r1_3;
+    __m128i i4_res_8x16b_r2_1, i4_res_8x16b_r2_2, i4_res_8x16b_r2_3;
+
+    /* Filter coefficient values for phase 4 */
+    __m128i i4_coeff_8x16b_0 = _mm_set1_epi16(-3);
+    __m128i i4_coeff_8x16b_1 = _mm_set1_epi16(28);
+    i4_filt_stride = 12;
+    i4_src_stride = DYADIC_REF_W_Y;
+
+    /* Initializing pointers */
+    pu1_inp = pu1_inp_buf;
+    pi2_tmp = pi2_tmp_filt_buf;
+    pu1_out = pu1_out_buf;
+
+    /* Vertical interpolation */
+    /*First 64 bit */
+    /* y = 0, y_phase = 12 */
+    i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
+    i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
+    i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
+    i4_samp_16x8b_3 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
+    pu1_inp += (i4_src_stride << 2);
+    i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
+    i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
+    i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
+    i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
+
+    /* since y_phase 12 for y = 0 */
+    /*Multiply by 8 =>  left shift by 3*/
+    i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
+    i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
+    i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
+
+    i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
+    i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
+    i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
+
+    _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
+    pi2_tmp += i4_filt_stride;
+
+    for(i4_y = 1; i4_y < 15; i4_y += 2)
+    {
+        i4_samp_8x16b_0 = i4_samp_8x16b_1;
+        i4_samp_8x16b_1 = i4_samp_8x16b_2;
+        i4_samp_8x16b_2 = i4_samp_8x16b_3;
+        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
+
+        /* y_phase is 4 for odd values of y */
+        /* and 12 for even values of y		*/
+        //*Multiply by 8 =>  left shift by 3*/
+        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
+        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
+        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
+
+        i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
+        i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
+        i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
+
+        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
+        i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
+
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
+        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
+
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
+        i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
+
+        /* Storing the results */
+        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
+        _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
+        pi2_tmp += (i4_filt_stride << 1);
+        pu1_inp += i4_src_stride;
+
+        } /* End of loop over y */
+
+        /* y = 15, y_phase = 4 */
+        i4_samp_8x16b_0 = i4_samp_8x16b_1;
+        i4_samp_8x16b_1 = i4_samp_8x16b_2;
+        i4_samp_8x16b_2 = i4_samp_8x16b_3;
+        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
+
+        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
+        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
+        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
+        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
+
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
+
+        /* Store the output */
+        _mm_storeu_si128((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
+
+        /* Reinitializing the ptrs */
+        pu1_inp = pu1_inp_buf;
+        pi2_tmp = pi2_tmp_filt_buf;
+
+    /*Remaining 32 bit */
+    pu1_inp += 8;
+    pi2_tmp += 8;
+
+        /* y = 0, y_phase = 12 */
+        i4_samp_16x8b_0 = _mm_loadl_epi64((__m128i *) (pu1_inp));
+        i4_samp_16x8b_1 = _mm_loadl_epi64((__m128i *) (pu1_inp + i4_src_stride));
+        i4_samp_16x8b_2 = _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1)));
+        i4_samp_16x8b_3 =
+            _mm_loadl_epi64((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
+        pu1_inp += (i4_src_stride << 2);
+        i4_samp_8x16b_0 = _mm_cvtepu8_epi16(i4_samp_16x8b_0);
+        i4_samp_8x16b_1 = _mm_cvtepu8_epi16(i4_samp_16x8b_1);
+        i4_samp_8x16b_2 = _mm_cvtepu8_epi16(i4_samp_16x8b_2);
+        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(i4_samp_16x8b_3);
+
+        /* since y_phase 12 for y = 0 */
+        /*Multiply by 8 =>  left shift by 3*/
+        i4_res_8x16b_r1_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
+        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
+        i4_res_8x16b_r1_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
+
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
+        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_0);
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
+
+        _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
+        pi2_tmp += i4_filt_stride;
+
+        for(i4_y = 1; i4_y < 15; i4_y += 2)
+        {
+            i4_samp_8x16b_0 = i4_samp_8x16b_1;
+            i4_samp_8x16b_1 = i4_samp_8x16b_2;
+            i4_samp_8x16b_2 = i4_samp_8x16b_3;
+            i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
+
+            /* y_phase is 4 for odd values of y */
+            /* and 12 for even values of y		*/
+            //*Multiply by 8 =>  left shift by 3*/
+            i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
+            i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
+            i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
+
+            i4_res_8x16b_r2_1 = _mm_slli_epi16(i4_samp_8x16b_1, 3);
+            i4_res_8x16b_r2_2 = _mm_mullo_epi16(i4_samp_8x16b_2, i4_coeff_8x16b_1);
+            i4_res_8x16b_r2_3 = _mm_mullo_epi16(i4_samp_8x16b_3, i4_coeff_8x16b_0);
+
+            i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
+            i4_res_8x16b_r2_3 = _mm_subs_epi16(i4_res_8x16b_r2_3, i4_samp_8x16b_0);
+
+            i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
+            i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_2);
+
+            i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
+            i4_res_8x16b_r2_1 = _mm_adds_epi16(i4_res_8x16b_r2_1, i4_res_8x16b_r2_3);
+
+            /* Storing the results */
+            _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
+            _mm_storel_epi64((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r2_1);
+            pi2_tmp += (i4_filt_stride << 1);
+            pu1_inp += i4_src_stride;
+
+        } /* End of loop over y */
+
+        /* y = 15, y_phase = 4 */
+        i4_samp_8x16b_0 = i4_samp_8x16b_1;
+        i4_samp_8x16b_1 = i4_samp_8x16b_2;
+        i4_samp_8x16b_2 = i4_samp_8x16b_3;
+        i4_samp_8x16b_3 = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *) (pu1_inp)));
+
+        i4_res_8x16b_r1_1 = _mm_mullo_epi16(i4_samp_8x16b_0, i4_coeff_8x16b_0);
+        i4_res_8x16b_r1_2 = _mm_mullo_epi16(i4_samp_8x16b_1, i4_coeff_8x16b_1);
+        i4_res_8x16b_r1_3 = _mm_slli_epi16(i4_samp_8x16b_2, 3);
+        i4_res_8x16b_r1_3 = _mm_subs_epi16(i4_res_8x16b_r1_3, i4_samp_8x16b_3);
+
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_2);
+        i4_res_8x16b_r1_1 = _mm_adds_epi16(i4_res_8x16b_r1_1, i4_res_8x16b_r1_3);
+
+        /* Store the output */
+        _mm_storel_epi64((__m128i *) pi2_tmp, i4_res_8x16b_r1_1);
+
+        /* Reinitializing the ptrs */
+        pu1_inp = pu1_inp_buf;
+        pi2_tmp = pi2_tmp_filt_buf;
+
+    {
+        __m128i coeff_c0_c1_8x16b = _mm_set_epi16(28, -3, 28, -3, 28, -3, 28, -3);
+        __m128i coeff_c2_c3_8x16b = _mm_set_epi16(-1, 8, -1, 8, -1, 8, -1, 8);
+        __m128i coeff_c3_c2_8x16b = _mm_set_epi16(8, -1, 8, -1, 8, -1, 8, -1);
+        __m128i coeff_c1_c0_8x16b = _mm_set_epi16(-3, 28, -3, 28, -3, 28, -3, 28);
+
+        __m128i i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart2_0;
+        __m128i i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart2_1;
+        __m128i i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart2_2;
+        __m128i i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart2_3;
+        __m128i i4_samp_8x16b_rpart1_4, i4_samp_8x16b_rpart2_4;
+
+        __m128i i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart2_0;
+        __m128i i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart2_1;
+        __m128i i4_res_4x32b_rpart1_2, i4_res_4x32b_rpart2_2;
+        __m128i i4_res_4x32b_rpart1_3, i4_res_4x32b_rpart2_3;
+
+        __m128i res_512 = _mm_set1_epi32(512);
+        /* Horizontal interpolation */
+        for(i4_y = 0; i4_y < 16; i4_y++)
+        {
+            i4_samp_8x16b_rpart1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);
+            i4_samp_8x16b_rpart2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 4));
+
+            i4_samp_8x16b_rpart1_1 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 2);
+            i4_samp_8x16b_rpart1_2 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 4);
+            i4_samp_8x16b_rpart1_3 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 6);
+            i4_samp_8x16b_rpart1_4 = _mm_srli_si128(i4_samp_8x16b_rpart1_0, 8);
+
+            i4_samp_8x16b_rpart2_1 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 2);
+            i4_samp_8x16b_rpart2_2 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 4);
+            i4_samp_8x16b_rpart2_3 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 6);
+            i4_samp_8x16b_rpart2_4 = _mm_srli_si128(i4_samp_8x16b_rpart2_0, 8);
+
+            i4_samp_8x16b_rpart1_0 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_0, i4_samp_8x16b_rpart1_1);
+            i4_samp_8x16b_rpart1_1 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_1, i4_samp_8x16b_rpart1_2);
+            i4_samp_8x16b_rpart1_2 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_2, i4_samp_8x16b_rpart1_3);
+            i4_samp_8x16b_rpart1_3 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart1_3, i4_samp_8x16b_rpart1_4);
+
+            i4_samp_8x16b_rpart2_0 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_0, i4_samp_8x16b_rpart2_1);
+            i4_samp_8x16b_rpart2_1 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_1, i4_samp_8x16b_rpart2_2);
+            i4_samp_8x16b_rpart2_2 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_2, i4_samp_8x16b_rpart2_3);
+            i4_samp_8x16b_rpart2_3 =
+                _mm_unpacklo_epi16(i4_samp_8x16b_rpart2_3, i4_samp_8x16b_rpart2_4);
+
+            i4_res_4x32b_rpart1_0 = _mm_madd_epi16(i4_samp_8x16b_rpart1_0, coeff_c3_c2_8x16b);
+            i4_res_4x32b_rpart1_2 = _mm_madd_epi16(i4_samp_8x16b_rpart1_2, coeff_c1_c0_8x16b);
+
+            i4_res_4x32b_rpart1_1 = _mm_madd_epi16(i4_samp_8x16b_rpart1_1, coeff_c0_c1_8x16b);
+            i4_res_4x32b_rpart1_3 = _mm_madd_epi16(i4_samp_8x16b_rpart1_3, coeff_c2_c3_8x16b);
+
+            i4_res_4x32b_rpart2_0 = _mm_madd_epi16(i4_samp_8x16b_rpart2_0, coeff_c3_c2_8x16b);
+            i4_res_4x32b_rpart2_2 = _mm_madd_epi16(i4_samp_8x16b_rpart2_2, coeff_c1_c0_8x16b);
+
+            i4_res_4x32b_rpart2_1 = _mm_madd_epi16(i4_samp_8x16b_rpart2_1, coeff_c0_c1_8x16b);
+            i4_res_4x32b_rpart2_3 = _mm_madd_epi16(i4_samp_8x16b_rpart2_3, coeff_c2_c3_8x16b);
+
+            i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_2);
+            i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_1, i4_res_4x32b_rpart1_3);
+
+            i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_2);
+            i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_1, i4_res_4x32b_rpart2_3);
+
+            i4_res_4x32b_rpart1_2 =
+                _mm_unpacklo_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
+            i4_res_4x32b_rpart1_3 =
+                _mm_unpackhi_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1);
+
+            i4_res_4x32b_rpart2_2 =
+                _mm_unpacklo_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
+            i4_res_4x32b_rpart2_3 =
+                _mm_unpackhi_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1);
+
+            i4_res_4x32b_rpart1_0 = _mm_add_epi32(i4_res_4x32b_rpart1_2, res_512);
+            i4_res_4x32b_rpart1_1 = _mm_add_epi32(i4_res_4x32b_rpart1_3, res_512);
+
+            i4_res_4x32b_rpart1_0 = _mm_srai_epi32(i4_res_4x32b_rpart1_0, 10);
+            i4_res_4x32b_rpart1_1 = _mm_srai_epi32(i4_res_4x32b_rpart1_1, 10);
+
+            i4_res_4x32b_rpart2_0 = _mm_add_epi32(i4_res_4x32b_rpart2_2, res_512);
+            i4_res_4x32b_rpart2_1 = _mm_add_epi32(i4_res_4x32b_rpart2_3, res_512);
+
+            i4_res_4x32b_rpart2_0 = _mm_srai_epi32(i4_res_4x32b_rpart2_0, 10);
+            i4_res_4x32b_rpart2_1 = _mm_srai_epi32(i4_res_4x32b_rpart2_1, 10);
+
+            _mm_storeu_si128(
+                (__m128i *) pu1_out,
+                _mm_packus_epi16(_mm_packus_epi32(i4_res_4x32b_rpart1_0, i4_res_4x32b_rpart1_1),
+                                 _mm_packus_epi32(i4_res_4x32b_rpart2_0, i4_res_4x32b_rpart2_1)));
+
+            pi2_tmp += i4_filt_stride;
+            pu1_out += i4_out_stride;
+
+        } /* End of loop over y */
+    }
+}
+
+void isvc_vert_interpol_chroma_dyadic_sse42(UWORD8 *pu1_inp_buf, WORD16 *pi2_tmp_filt_buf,
+                                            WORD32 i4_phase_0, WORD32 i4_phase_1)
+{
+    WORD8 i4_coeff_0, i4_coeff_1, i4_coeff_2, i4_coeff_3;
+    WORD32 i4_filt_stride, i4_src_stride;
+    UWORD8 *pu1_inp;
+    WORD16 *pi2_tmp;
+    __m128i i4_samp_16x8b_0, i4_samp_16x8b_1, i4_samp_16x8b_2, i4_samp_16x8b_3, i4_samp_16x8b_4,
+        i4_samp_16x8b_5;
+    __m128i i4_res_8x16b_r0, i4_res_8x16b_r1, i4_res_8x16b_r2, i4_res_8x16b_r3, i4_res_8x16b_r4,
+        i4_res_8x16b_r5, i4_res_8x16b_r6, i4_res_8x16b_r7;
+    __m128i i4_res_8x16b_r7_temp;
+    __m128i i4_c0_c1_16x8b, i4_c2_c3_16x8b;
+
+    i4_coeff_0 = (WORD8) (16 - i4_phase_0);
+    i4_coeff_1 = (WORD8) (i4_phase_0);
+    i4_coeff_2 = (WORD8) (16 - i4_phase_1);
+    i4_coeff_3 = (WORD8) (i4_phase_1);
+
+    i4_c0_c1_16x8b =
+        _mm_set_epi8(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
+                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
+                     i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
+    i4_c2_c3_16x8b =
+        _mm_set_epi8(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
+                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
+                     i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
+
+    /* Initializing pointers */
+    pu1_inp = pu1_inp_buf;
+    pi2_tmp = pi2_tmp_filt_buf;
+    i4_filt_stride = 6;
+    i4_src_stride = DYADIC_REF_W_C;
+
+    i4_samp_16x8b_0 = _mm_loadu_si128((__m128i *) (pu1_inp));
+    i4_samp_16x8b_1 = _mm_loadu_si128((__m128i *) (pu1_inp + i4_src_stride));
+    i4_samp_16x8b_2 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1)));
+    i4_samp_16x8b_3 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 1) + i4_src_stride));
+    i4_samp_16x8b_4 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2)));
+    i4_samp_16x8b_5 = _mm_loadu_si128((__m128i *) (pu1_inp + (i4_src_stride << 2) + i4_src_stride));
+
+    i4_samp_16x8b_0 = _mm_unpacklo_epi8(i4_samp_16x8b_0, i4_samp_16x8b_1);
+    i4_res_8x16b_r0 = _mm_maddubs_epi16(i4_samp_16x8b_0, i4_c0_c1_16x8b);
+    _mm_storeu_si128((__m128i *) (pi2_tmp), i4_res_8x16b_r0);
+
+    i4_samp_16x8b_1 = _mm_unpacklo_epi8(i4_samp_16x8b_1, i4_samp_16x8b_2);
+    i4_res_8x16b_r1 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c2_c3_16x8b);
+    _mm_storeu_si128((__m128i *) (pi2_tmp + i4_filt_stride), i4_res_8x16b_r1);
+
+    i4_res_8x16b_r2 = _mm_maddubs_epi16(i4_samp_16x8b_1, i4_c0_c1_16x8b);
+    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1)), i4_res_8x16b_r2);
+
+    i4_samp_16x8b_2 = _mm_unpacklo_epi8(i4_samp_16x8b_2, i4_samp_16x8b_3);
+    i4_res_8x16b_r3 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c2_c3_16x8b);
+    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 1) + i4_filt_stride),
+                     i4_res_8x16b_r3);
+
+    i4_res_8x16b_r4 = _mm_maddubs_epi16(i4_samp_16x8b_2, i4_c0_c1_16x8b);
+    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2)), i4_res_8x16b_r4);
+
+    i4_samp_16x8b_3 = _mm_unpacklo_epi8(i4_samp_16x8b_3, i4_samp_16x8b_4);
+    i4_res_8x16b_r5 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c2_c3_16x8b);
+    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + i4_filt_stride),
+                     i4_res_8x16b_r5);
+
+    i4_res_8x16b_r6 = _mm_maddubs_epi16(i4_samp_16x8b_3, i4_c0_c1_16x8b);
+    _mm_storel_epi64((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1)),
+                     i4_res_8x16b_r6);
+
+    i4_res_8x16b_r6 = _mm_shuffle_epi32(i4_res_8x16b_r6, 78);
+
+    i4_samp_16x8b_4 = _mm_unpacklo_epi8(i4_samp_16x8b_4, i4_samp_16x8b_5);
+
+    i4_res_8x16b_r7 = _mm_maddubs_epi16(i4_samp_16x8b_4, i4_c2_c3_16x8b);
+
+    i4_res_8x16b_r7 = _mm_shuffle_epi32(i4_res_8x16b_r7, 147);
+
+    i4_res_8x16b_r7_temp = _mm_blend_epi16(i4_res_8x16b_r6, i4_res_8x16b_r7, 252);
+
+    _mm_storeu_si128((__m128i *) (pi2_tmp + (i4_filt_stride << 2) + (i4_filt_stride << 1) + 4),
+                     i4_res_8x16b_r7_temp);
+}
+
+void isvc_horz_interpol_chroma_dyadic_sse42(WORD16 *pi2_tmp_filt_buf, UWORD8 *pu1_out_buf,
+                                            WORD32 i4_out_stride, WORD32 i4_phase_0,
+                                            WORD32 i4_phase_1)
+{
+    WORD32 i4_dst_stride, i4_dst_stride2, i4_dst_stride4;
+    UWORD8 *pu1_out;
+    WORD16 *pi2_tmp;
+
+    __m128i i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2;
+    __m128i i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2;
+    __m128i i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2;
+    __m128i i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2;
+    __m128i i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2;
+    __m128i i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2;
+    __m128i i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2;
+    __m128i i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2;
+
+    __m128i i4_res_4x32b_r1_0, i4_res_4x32b_r1_1;
+    __m128i i4_res_4x32b_r2_0, i4_res_4x32b_r2_1;
+    __m128i i4_res_4x32b_r3_0, i4_res_4x32b_r3_1;
+    __m128i i4_res_4x32b_r4_0, i4_res_4x32b_r4_1;
+    __m128i i4_res_4x32b_r5_0, i4_res_4x32b_r5_1;
+    __m128i i4_res_4x32b_r6_0, i4_res_4x32b_r6_1;
+    __m128i i4_res_4x32b_r7_0, i4_res_4x32b_r7_1;
+    __m128i i4_res_4x32b_r8_0, i4_res_4x32b_r8_1;
+
+    __m128i i4_res_final_8x16b_r1, i4_res_final_8x16b_r2, i4_res_final_8x16b_r3,
+        i4_res_final_8x16b_r4, i4_res_final_8x16b_r5, i4_res_final_8x16b_r6, i4_res_final_8x16b_r7,
+        i4_res_final_8x16b_r8;
+
+    __m128i out_16x8b_r1, out_16x8b_r2, out_16x8b_r3, out_16x8b_r4, out_16x8b_r5, out_16x8b_r6,
+        out_16x8b_r7, out_16x8b_r8;
+
+    __m128i i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1;
+    __m128i i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1;
+    __m128i i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1;
+    __m128i i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1;
+    __m128i chroma_mask, chroma_mask2;
+
+    WORD32 i4_coeff_0 = 16 - i4_phase_0;
+    WORD32 i4_coeff_1 = i4_phase_0;
+    WORD32 i4_coeff_2 = 16 - i4_phase_1;
+    WORD32 i4_coeff_3 = i4_phase_1;
+    __m128i coeff_c0_c1_8x16b = _mm_set_epi16(i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0,
+                                              i4_coeff_1, i4_coeff_0, i4_coeff_1, i4_coeff_0);
+    __m128i coeff_c2_c3_8x16b = _mm_set_epi16(i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2,
+                                              i4_coeff_3, i4_coeff_2, i4_coeff_3, i4_coeff_2);
+    __m128i res_128 = _mm_set1_epi32(128);
+    UWORD32 u4_norm_factor = 8;
+
+    /* Initializing pointers */
+    pu1_out = pu1_out_buf;
+    pi2_tmp = pi2_tmp_filt_buf;
+    i4_dst_stride = i4_out_stride;
+
+    i4_dst_stride2 = i4_dst_stride << 1;
+    i4_dst_stride4 = i4_dst_stride << 2;
+
+    /* Horizontal interpolation */
+    i4_samp_8x16b_r1_0 = _mm_loadu_si128((__m128i *) pi2_tmp);
+    i4_samp_8x16b_r2_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 6));
+    i4_samp_8x16b_r3_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 12));
+    i4_samp_8x16b_r4_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 18));
+    i4_samp_8x16b_r5_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 24));
+    i4_samp_8x16b_r6_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 30));
+    i4_samp_8x16b_r7_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 36));
+    i4_samp_8x16b_r8_0 = _mm_loadu_si128((__m128i *) (pi2_tmp + 42));
+
+    i4_samp_8x16b_r1_1 = _mm_srli_si128(i4_samp_8x16b_r1_0, 2);
+    i4_samp_8x16b_r1_2 = _mm_srli_si128(i4_samp_8x16b_r1_0, 4);
+
+    i4_samp_8x16b_r2_1 = _mm_srli_si128(i4_samp_8x16b_r2_0, 2);
+    i4_samp_8x16b_r2_2 = _mm_srli_si128(i4_samp_8x16b_r2_0, 4);
+
+    i4_samp_8x16b_r3_1 = _mm_srli_si128(i4_samp_8x16b_r3_0, 2);
+    i4_samp_8x16b_r3_2 = _mm_srli_si128(i4_samp_8x16b_r3_0, 4);
+
+    i4_samp_8x16b_r4_1 = _mm_srli_si128(i4_samp_8x16b_r4_0, 2);
+    i4_samp_8x16b_r4_2 = _mm_srli_si128(i4_samp_8x16b_r4_0, 4);
+
+    i4_samp_8x16b_r5_1 = _mm_srli_si128(i4_samp_8x16b_r5_0, 2);
+    i4_samp_8x16b_r5_2 = _mm_srli_si128(i4_samp_8x16b_r5_0, 4);
+
+    i4_samp_8x16b_r6_1 = _mm_srli_si128(i4_samp_8x16b_r6_0, 2);
+    i4_samp_8x16b_r6_2 = _mm_srli_si128(i4_samp_8x16b_r6_0, 4);
+
+    i4_samp_8x16b_r7_1 = _mm_srli_si128(i4_samp_8x16b_r7_0, 2);
+    i4_samp_8x16b_r7_2 = _mm_srli_si128(i4_samp_8x16b_r7_0, 4);
+
+    i4_samp_8x16b_r8_1 = _mm_srli_si128(i4_samp_8x16b_r8_0, 2);
+    i4_samp_8x16b_r8_2 = _mm_srli_si128(i4_samp_8x16b_r8_0, 4);
+
+    i4_samp_8x16b_r1_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_0, i4_samp_8x16b_r1_1);
+    i4_samp_8x16b_r2_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_0, i4_samp_8x16b_r2_1);
+    i4_samp_8x16b_r3_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_0, i4_samp_8x16b_r3_1);
+    i4_samp_8x16b_r4_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_0, i4_samp_8x16b_r4_1);
+    i4_samp_8x16b_r5_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_0, i4_samp_8x16b_r5_1);
+    i4_samp_8x16b_r6_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_0, i4_samp_8x16b_r6_1);
+    i4_samp_8x16b_r7_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_0, i4_samp_8x16b_r7_1);
+    i4_samp_8x16b_r8_0 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_0, i4_samp_8x16b_r8_1);
+
+    i4_samp_8x16b_r1_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r1_1, i4_samp_8x16b_r1_2);
+    i4_samp_8x16b_r2_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r2_1, i4_samp_8x16b_r2_2);
+    i4_samp_8x16b_r3_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r3_1, i4_samp_8x16b_r3_2);
+    i4_samp_8x16b_r4_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r4_1, i4_samp_8x16b_r4_2);
+    i4_samp_8x16b_r5_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r5_1, i4_samp_8x16b_r5_2);
+    i4_samp_8x16b_r6_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r6_1, i4_samp_8x16b_r6_2);
+    i4_samp_8x16b_r7_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r7_1, i4_samp_8x16b_r7_2);
+    i4_samp_8x16b_r8_1 = _mm_unpacklo_epi16(i4_samp_8x16b_r8_1, i4_samp_8x16b_r8_2);
+
+    // a0c0 + a1c1  a1c0 + a2c1  a2c0 + a3c1  a3c0 + a4c1
+    i4_res_4x32b_r1_0 = _mm_madd_epi16(i4_samp_8x16b_r1_0, coeff_c0_c1_8x16b);
+    // b0c0+b1c1  b1c0+b2c1  b2c0+b3c1  b3c0+b4c1
+    i4_res_4x32b_r2_0 = _mm_madd_epi16(i4_samp_8x16b_r2_0, coeff_c0_c1_8x16b);
+    i4_res_4x32b_r3_0 = _mm_madd_epi16(i4_samp_8x16b_r3_0, coeff_c0_c1_8x16b);
+    i4_res_4x32b_r4_0 = _mm_madd_epi16(i4_samp_8x16b_r4_0, coeff_c0_c1_8x16b);
+    i4_res_4x32b_r5_0 = _mm_madd_epi16(i4_samp_8x16b_r5_0, coeff_c0_c1_8x16b);
+    i4_res_4x32b_r6_0 = _mm_madd_epi16(i4_samp_8x16b_r6_0, coeff_c0_c1_8x16b);
+    i4_res_4x32b_r7_0 = _mm_madd_epi16(i4_samp_8x16b_r7_0, coeff_c0_c1_8x16b);
+    i4_res_4x32b_r8_0 = _mm_madd_epi16(i4_samp_8x16b_r8_0, coeff_c0_c1_8x16b);
+
+    // a1c2+a2c3  a2c2+a3c3  a3c2+a4c3  a4c2+a5c3
+    i4_res_4x32b_r1_1 = _mm_madd_epi16(i4_samp_8x16b_r1_1, coeff_c2_c3_8x16b);
+    // b1c2+b2c3  b2c2+b3c3  b3c2+b4c3  b4c2+b5c3
+    i4_res_4x32b_r2_1 = _mm_madd_epi16(i4_samp_8x16b_r2_1, coeff_c2_c3_8x16b);
+    i4_res_4x32b_r3_1 = _mm_madd_epi16(i4_samp_8x16b_r3_1, coeff_c2_c3_8x16b);
+    i4_res_4x32b_r4_1 = _mm_madd_epi16(i4_samp_8x16b_r4_1, coeff_c2_c3_8x16b);
+    i4_res_4x32b_r5_1 = _mm_madd_epi16(i4_samp_8x16b_r5_1, coeff_c2_c3_8x16b);
+    i4_res_4x32b_r6_1 = _mm_madd_epi16(i4_samp_8x16b_r6_1, coeff_c2_c3_8x16b);
+    i4_res_4x32b_r7_1 = _mm_madd_epi16(i4_samp_8x16b_r7_1, coeff_c2_c3_8x16b);
+    i4_res_4x32b_r8_1 = _mm_madd_epi16(i4_samp_8x16b_r8_1, coeff_c2_c3_8x16b);
+
+    i4_res_4x32b_r1_0 = _mm_add_epi32(i4_res_4x32b_r1_0, res_128);
+    i4_res_4x32b_r2_0 = _mm_add_epi32(i4_res_4x32b_r2_0, res_128);
+    i4_res_4x32b_r3_0 = _mm_add_epi32(i4_res_4x32b_r3_0, res_128);
+    i4_res_4x32b_r4_0 = _mm_add_epi32(i4_res_4x32b_r4_0, res_128);
+    i4_res_4x32b_r5_0 = _mm_add_epi32(i4_res_4x32b_r5_0, res_128);
+    i4_res_4x32b_r6_0 = _mm_add_epi32(i4_res_4x32b_r6_0, res_128);
+    i4_res_4x32b_r7_0 = _mm_add_epi32(i4_res_4x32b_r7_0, res_128);
+    i4_res_4x32b_r8_0 = _mm_add_epi32(i4_res_4x32b_r8_0, res_128);
+
+    i4_res_4x32b_r1_1 = _mm_add_epi32(i4_res_4x32b_r1_1, res_128);
+    i4_res_4x32b_r2_1 = _mm_add_epi32(i4_res_4x32b_r2_1, res_128);
+    i4_res_4x32b_r3_1 = _mm_add_epi32(i4_res_4x32b_r3_1, res_128);
+    i4_res_4x32b_r4_1 = _mm_add_epi32(i4_res_4x32b_r4_1, res_128);
+    i4_res_4x32b_r5_1 = _mm_add_epi32(i4_res_4x32b_r5_1, res_128);
+    i4_res_4x32b_r6_1 = _mm_add_epi32(i4_res_4x32b_r6_1, res_128);
+    i4_res_4x32b_r7_1 = _mm_add_epi32(i4_res_4x32b_r7_1, res_128);
+    i4_res_4x32b_r8_1 = _mm_add_epi32(i4_res_4x32b_r8_1, res_128);
+
+    i4_res_4x32b_r1_0 = _mm_srai_epi32(i4_res_4x32b_r1_0, u4_norm_factor);
+    i4_res_4x32b_r2_0 = _mm_srai_epi32(i4_res_4x32b_r2_0, u4_norm_factor);
+    i4_res_4x32b_r3_0 = _mm_srai_epi32(i4_res_4x32b_r3_0, u4_norm_factor);
+    i4_res_4x32b_r4_0 = _mm_srai_epi32(i4_res_4x32b_r4_0, u4_norm_factor);
+    i4_res_4x32b_r5_0 = _mm_srai_epi32(i4_res_4x32b_r5_0, u4_norm_factor);
+    i4_res_4x32b_r6_0 = _mm_srai_epi32(i4_res_4x32b_r6_0, u4_norm_factor);
+    i4_res_4x32b_r7_0 = _mm_srai_epi32(i4_res_4x32b_r7_0, u4_norm_factor);
+    i4_res_4x32b_r8_0 = _mm_srai_epi32(i4_res_4x32b_r8_0, u4_norm_factor);
+
+    i4_res_4x32b_r1_1 = _mm_srai_epi32(i4_res_4x32b_r1_1, u4_norm_factor);
+    i4_res_4x32b_r2_1 = _mm_srai_epi32(i4_res_4x32b_r2_1, u4_norm_factor);
+    i4_res_4x32b_r3_1 = _mm_srai_epi32(i4_res_4x32b_r3_1, u4_norm_factor);
+    i4_res_4x32b_r4_1 = _mm_srai_epi32(i4_res_4x32b_r4_1, u4_norm_factor);
+    i4_res_4x32b_r5_1 = _mm_srai_epi32(i4_res_4x32b_r5_1, u4_norm_factor);
+    i4_res_4x32b_r6_1 = _mm_srai_epi32(i4_res_4x32b_r6_1, u4_norm_factor);
+    i4_res_4x32b_r7_1 = _mm_srai_epi32(i4_res_4x32b_r7_1, u4_norm_factor);
+    i4_res_4x32b_r8_1 = _mm_srai_epi32(i4_res_4x32b_r8_1, u4_norm_factor);
+
+    i4_res_final_8x16b_r12_0 = _mm_packs_epi32(i4_res_4x32b_r1_0, i4_res_4x32b_r2_0);
+    i4_res_final_8x16b_r34_0 = _mm_packs_epi32(i4_res_4x32b_r3_0, i4_res_4x32b_r4_0);
+    i4_res_final_8x16b_r56_0 = _mm_packs_epi32(i4_res_4x32b_r5_0, i4_res_4x32b_r6_0);
+    i4_res_final_8x16b_r67_0 = _mm_packs_epi32(i4_res_4x32b_r7_0, i4_res_4x32b_r8_0);
+
+    i4_res_final_8x16b_r12_1 = _mm_packs_epi32(i4_res_4x32b_r1_1, i4_res_4x32b_r2_1);
+    i4_res_final_8x16b_r34_1 = _mm_packs_epi32(i4_res_4x32b_r3_1, i4_res_4x32b_r4_1);
+    i4_res_final_8x16b_r56_1 = _mm_packs_epi32(i4_res_4x32b_r5_1, i4_res_4x32b_r6_1);
+    i4_res_final_8x16b_r67_1 = _mm_packs_epi32(i4_res_4x32b_r7_1, i4_res_4x32b_r8_1);
+
+    i4_res_final_8x16b_r1 = _mm_unpacklo_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
+    i4_res_final_8x16b_r2 = _mm_unpackhi_epi16(i4_res_final_8x16b_r12_0, i4_res_final_8x16b_r12_1);
+    i4_res_final_8x16b_r3 = _mm_unpacklo_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
+    i4_res_final_8x16b_r4 = _mm_unpackhi_epi16(i4_res_final_8x16b_r34_0, i4_res_final_8x16b_r34_1);
+    i4_res_final_8x16b_r5 = _mm_unpacklo_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
+    i4_res_final_8x16b_r6 = _mm_unpackhi_epi16(i4_res_final_8x16b_r56_0, i4_res_final_8x16b_r56_1);
+    i4_res_final_8x16b_r7 = _mm_unpacklo_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
+    i4_res_final_8x16b_r8 = _mm_unpackhi_epi16(i4_res_final_8x16b_r67_0, i4_res_final_8x16b_r67_1);
+
+    chroma_mask = _mm_set1_epi16(0xFF00);
+    chroma_mask2 = _mm_set1_epi16(0x00FF);
+    out_16x8b_r1 = _mm_loadu_si128((__m128i *) (&pu1_out[0]));
+    out_16x8b_r2 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride]));
+    out_16x8b_r3 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2]));
+    out_16x8b_r4 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride2 + i4_dst_stride]));
+    out_16x8b_r5 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4]));
+    out_16x8b_r6 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride]));
+    out_16x8b_r7 = _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2]));
+    out_16x8b_r8 =
+        _mm_loadu_si128((__m128i *) (&pu1_out[i4_dst_stride4 + i4_dst_stride2 + i4_dst_stride]));
+
+    out_16x8b_r1 = _mm_and_si128(out_16x8b_r1, chroma_mask);
+    out_16x8b_r2 = _mm_and_si128(out_16x8b_r2, chroma_mask);
+    out_16x8b_r3 = _mm_and_si128(out_16x8b_r3, chroma_mask);
+    out_16x8b_r4 = _mm_and_si128(out_16x8b_r4, chroma_mask);
+    out_16x8b_r5 = _mm_and_si128(out_16x8b_r5, chroma_mask);
+    out_16x8b_r6 = _mm_and_si128(out_16x8b_r6, chroma_mask);
+    out_16x8b_r7 = _mm_and_si128(out_16x8b_r7, chroma_mask);
+    out_16x8b_r8 = _mm_and_si128(out_16x8b_r8, chroma_mask);
+
+    i4_res_final_8x16b_r1 = _mm_and_si128(i4_res_final_8x16b_r1, chroma_mask2);
+    i4_res_final_8x16b_r2 = _mm_and_si128(i4_res_final_8x16b_r2, chroma_mask2);
+    i4_res_final_8x16b_r3 = _mm_and_si128(i4_res_final_8x16b_r3, chroma_mask2);
+    i4_res_final_8x16b_r4 = _mm_and_si128(i4_res_final_8x16b_r4, chroma_mask2);
+    i4_res_final_8x16b_r5 = _mm_and_si128(i4_res_final_8x16b_r5, chroma_mask2);
+    i4_res_final_8x16b_r6 = _mm_and_si128(i4_res_final_8x16b_r6, chroma_mask2);
+    i4_res_final_8x16b_r7 = _mm_and_si128(i4_res_final_8x16b_r7, chroma_mask2);
+    i4_res_final_8x16b_r8 = _mm_and_si128(i4_res_final_8x16b_r8, chroma_mask2);
+
+    out_16x8b_r1 = _mm_add_epi8(i4_res_final_8x16b_r1, out_16x8b_r1);
+    out_16x8b_r2 = _mm_add_epi8(i4_res_final_8x16b_r2, out_16x8b_r2);
+    out_16x8b_r3 = _mm_add_epi8(i4_res_final_8x16b_r3, out_16x8b_r3);
+    out_16x8b_r4 = _mm_add_epi8(i4_res_final_8x16b_r4, out_16x8b_r4);
+    out_16x8b_r5 = _mm_add_epi8(i4_res_final_8x16b_r5, out_16x8b_r5);
+    out_16x8b_r6 = _mm_add_epi8(i4_res_final_8x16b_r6, out_16x8b_r6);
+    out_16x8b_r7 = _mm_add_epi8(i4_res_final_8x16b_r7, out_16x8b_r7);
+    out_16x8b_r8 = _mm_add_epi8(i4_res_final_8x16b_r8, out_16x8b_r8);
+
+    _mm_storeu_si128((__m128i *) pu1_out, out_16x8b_r1);
+    _mm_storeu_si128((__m128i *) (pu1_out + i4_dst_stride), out_16x8b_r2);
+    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 2)), out_16x8b_r3);
+    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 3)), out_16x8b_r4);
+    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 4)), out_16x8b_r5);
+    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 5)), out_16x8b_r6);
+    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 6)), out_16x8b_r7);
+    _mm_storeu_si128((__m128i *) (pu1_out + (i4_dst_stride * 7)), out_16x8b_r8);
+}
--- a/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c
+++ b/common/x86/svc/isvc_iquant_itrans_recon_dc_ssse3.c
@ -0,0 +1,548 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ *******************************************************************************
+ * @file
+ *  isvc_iquant_itrans_recon_dc_ssse3.c
+ *
+ * @brief
+ *  Contains function definitions for inverse  quantization, inverse
+ * transform and reconstruction
+ *
+ * @author
+ *  Mohit [100664]
+ *
+ * @par List of Functions:
+ *  - isvc_iquant_itrans_recon_4x4_dc_ssse3()
+ *  - isvc_iquant_itrans_recon_8x8_dc_ssse3()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+#include <immintrin.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+ * prediction buffer for dc input pattern only, i.e. only the (0,0) element of
+ *the input 4x4 block is non-zero. For complete function, refer
+ *isvc_iquant_itrans_recon_ssse3.c
+ *
+ * @par Description:
+ *  The quantized residue is first inverse quantized, then inverse transformed.
+ *  This inverse transformed content is added to the prediction buffer to recon-
+ *  struct the end output
+ *
+ * @param[in] pi2_src
+ *  quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ *  prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ *  reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ *  quantization buffer stride
+ *
+ * @param[in] i4_pred_stride,
+ *  Prediction buffer stride
+ *
+ * @param[in] i4_out_stride
+ *  recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ *  pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ *  pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ *  Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void isvc_iquant_itrans_recon_4x4_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                                           buffer_container_t *ps_res_pred,
+                                           buffer_container_t *ps_res, buffer_container_t *ps_rec,
+                                           iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
+                                           WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
+                                           WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
+{
+    WORD16 *pi2_src = ps_src->pv_data;
+    WORD16 *pi2_res = ps_res->pv_data;
+    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
+    UWORD8 *pu1_pred = ps_pred->pv_data;
+    UWORD8 *pu1_out = ps_rec->pv_data;
+    WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_res_stride = ps_res->i4_data_stride;
+    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
+    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
+    WORD32 i4_out_stride = ps_rec->i4_data_stride;
+    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
+    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
+    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
+    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
+    WORD32 q0 = pi2_src[0];
+    WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+
+    __m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3;
+    __m128i sign_reg;
+    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
+    __m128i temp4, temp5, temp6, temp7;
+    __m128i value_add;
+
+    UNUSED(pi2_tmp);
+    UNUSED(u1_res_accumulate);
+    UNUSED(i4_src_stride);
+    UNUSED(i4_res_stride);
+    UNUSED(i4_res_pred_stride);
+    UNUSED(pi2_res);
+    UNUSED(pi2_res_pred);
+    UNUSED(i4_iq_start_idx);
+
+    /* Implement residue accumulation */
+    ASSERT(0);
+
+    INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
+
+    if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0];  // Restoring dc value for intra case
+
+    i_macro = ((q0 + 32) >> 6);
+
+    value_add = _mm_set1_epi16(i_macro);
+
+    zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
+    // Load pred buffer
+    predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));  // p00 p01 p02 p03 0 0 0 0 0
+                                                               // 0 0 0 -- all 8 bits
+    pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p00 p01 p02 p03 0 0 0 0 -- all 16 bits
+    predload_r =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride]));  // p10 p11 p12 p13 0 0 0 0 0 0
+                                                                   // 0 0 -- all 8 bits
+    pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p10 p11 p12 p13 0 0 0 0 -- all 16 bits
+    predload_r =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride]));  // p20 p21 p22 p23 0 0 0 0
+                                                                       // 0 0 0 0 -- all 8 bits
+    pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p20 p21 p22 p23 0 0 0 0 -- all 16 bits
+    predload_r =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride]));  // p30 p31 p32 p33 0 0 0 0
+                                                                       // 0 0 0 0 -- all 8 bits
+    pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p30 p31 p32 p33 0 0 0 0 -- all 16 bits
+
+    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1);  // p00 p01 p02 p03 p10 p11 p12 p13
+    pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3);  // p20 p21 p22p p23 p30 p31 p32 p33
+
+    temp4 = _mm_add_epi16(value_add, pred_r0);
+    temp5 = _mm_add_epi16(value_add, pred_r2);
+    /*------------------------------------------------------------------*/
+    // Clipping the results to 8 bits
+    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b);  // sign check
+    temp4 = _mm_and_si128(temp4, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b);  // sign check
+    temp5 = _mm_and_si128(temp5, sign_reg);
+
+    temp4 = _mm_packus_epi16(temp4, temp5);
+    temp5 = _mm_srli_si128(temp4, 4);
+    temp6 = _mm_srli_si128(temp5, 4);
+    temp7 = _mm_srli_si128(temp6, 4);
+
+    *pu4_out = _mm_cvtsi128_si32(temp4);
+    pu1_out += i4_out_stride;
+    pu4_out = (UWORD32 *) (pu1_out);
+    *(pu4_out) = _mm_cvtsi128_si32(temp5);
+    pu1_out += i4_out_stride;
+    pu4_out = (UWORD32 *) (pu1_out);
+    *(pu4_out) = _mm_cvtsi128_si32(temp6);
+    pu1_out += i4_out_stride;
+    pu4_out = (UWORD32 *) (pu1_out);
+    *(pu4_out) = _mm_cvtsi128_si32(temp7);
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  This function performs inverse quant and Inverse transform type Ci4 for 8x8
+ *block for dc input pattern only, i.e. only the (0,0) element of the input 8x8
+ *block is non-zero. For complete function, refer
+ *isvc_iquant_itrans_recon_ssse3.c
+ *
+ * @par Description:
+ *  Performs inverse transform Ci8 and adds the residue to get the
+ *  reconstructed block
+ *
+ * @param[in] pi2_src
+ *  Input 8x8coefficients
+ *
+ * @param[in] pu1_pred
+ *  Prediction 8x8 block
+ *
+ * @param[out] pu1_recon
+ *  Output 8x8 block
+ *
+ * @param[in] q_div
+ *  QP/6
+ *
+ * @param[in] q_rem
+ *  QP%6
+ *
+ * @param[in] q_lev
+ *  Quantizer level
+ *
+ * @param[in] u4_src_stride
+ *  Input stride
+ *
+ * @param[in] u4_pred_stride,
+ *  Prediction stride
+ *
+ * @param[in] u4_out_stride
+ *  Output Stride
+ *
+ * @param[in] pi4_tmp
+ *  temporary buffer of size 1*64
+ *  the tmp for each block
+ *
+ * @param[in] pu4_iquant_mat
+ *  Pointer to the inverse quantization matrix
+ *
+ * @returns  Void
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+void isvc_iquant_itrans_recon_8x8_dc_ssse3(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                                           buffer_container_t *ps_res_pred,
+                                           buffer_container_t *ps_res, buffer_container_t *ps_rec,
+                                           iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
+                                           WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
+                                           WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
+{
+    WORD16 *pi2_src = ps_src->pv_data;
+    WORD16 *pi2_res = ps_res->pv_data;
+    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
+    UWORD8 *pu1_pred = ps_pred->pv_data;
+    UWORD8 *pu1_out = ps_rec->pv_data;
+    WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_res_stride = ps_res->i4_data_stride;
+    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
+    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
+    WORD32 i4_out_stride = ps_rec->i4_data_stride;
+    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
+    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
+    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
+    WORD32 q0 = pi2_src[0];
+    WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 6) ? 1 << (5 - u4_qp_div_6) : 0;
+
+    __m128i predload_r, pred_r0, pred_r1, pred_r2, pred_r3, pred_r4, pred_r5, pred_r6, pred_r7;
+    __m128i sign_reg;
+    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
+    __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+    __m128i value_add;
+
+    UNUSED(pi2_tmp);
+    UNUSED(pi2_dc_src);
+    UNUSED(u1_res_accumulate);
+    UNUSED(i4_src_stride);
+    UNUSED(i4_res_stride);
+    UNUSED(i4_res_pred_stride);
+    UNUSED(pi2_res);
+    UNUSED(pi2_res_pred);
+    UNUSED(i4_iq_start_idx);
+
+    /* Implement residue accumulation */
+    ASSERT(0);
+
+    INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 6);
+    i_macro = ((q0 + 32) >> 6);
+
+    value_add = _mm_set1_epi16(i_macro);
+
+    // Load pred buffer row 0
+    predload_r =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));      // p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0
+                                                          // -- all 8 bits
+    pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+    // Load pred buffer row 1
+    predload_r =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride]));  // p0 p1 p2 p3 p4 p5 p6 p7 0 0
+                                                                   // 0 0 0 0 0 0 -- all 8 bits
+    pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+    // Load pred buffer row 2
+    predload_r = _mm_loadl_epi64(
+        (__m128i *) (&pu1_pred[2 * i4_pred_stride]));     // p0 p1 p2 p3 p4 p5 p6 p7 0 0
+                                                          // 0 0 0 0 0 0 -- all 8 bits
+    pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+    // Load pred buffer row 3
+    predload_r = _mm_loadl_epi64(
+        (__m128i *) (&pu1_pred[3 * i4_pred_stride]));     // p0 p1 p2 p3 p4 p5 p6 p7 0 0
+                                                          // 0 0 0 0 0 0 -- all 8 bits
+    pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+    // Load pred buffer row 4
+    predload_r = _mm_loadl_epi64(
+        (__m128i *) (&pu1_pred[4 * i4_pred_stride]));     // p0 p1 p2 p3 p4 p5 p6 p7 0 0
+                                                          // 0 0 0 0 0 0 -- all 8 bits
+    pred_r4 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+    // Load pred buffer row 5
+    predload_r =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[5 * i4_pred_stride]));  // p0 p1 p2 p3 p4 p5 p6 p7 0
+                                                                       // 0 0 0 0 0 0 0 -- all 8 bit
+    pred_r5 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+    // Load pred buffer row 6
+    predload_r = _mm_loadl_epi64(
+        (__m128i *) (&pu1_pred[6 * i4_pred_stride]));     // p0 p1 p2 p3 p4 p5 p6 p7 0 0
+                                                          // 0 0 0 0 0 0 -- all 8 bits
+    pred_r6 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+    // Load pred buffer row 7
+    predload_r = _mm_loadl_epi64(
+        (__m128i *) (&pu1_pred[7 * i4_pred_stride]));     // p0 p1 p2 p3 p4 p5 p6 p7 0 0
+                                                          // 0 0 0 0 0 0 -- all 8 bits
+    pred_r7 = _mm_unpacklo_epi8(predload_r, zero_8x16b);  // p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+
+    temp1 = _mm_add_epi16(value_add, pred_r0);
+
+    temp2 = _mm_add_epi16(value_add, pred_r1);
+
+    temp3 = _mm_add_epi16(value_add, pred_r2);
+
+    temp4 = _mm_add_epi16(value_add, pred_r3);
+
+    temp5 = _mm_add_epi16(value_add, pred_r4);
+
+    temp6 = _mm_add_epi16(value_add, pred_r5);
+
+    temp7 = _mm_add_epi16(value_add, pred_r6);
+
+    temp8 = _mm_add_epi16(value_add, pred_r7);
+    /*------------------------------------------------------------------*/
+    // Clipping the results to 8 bits
+    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);  // sign check
+    temp1 = _mm_and_si128(temp1, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b);  // sign check
+    temp2 = _mm_and_si128(temp2, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b);  // sign check
+    temp3 = _mm_and_si128(temp3, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b);  // sign check
+    temp4 = _mm_and_si128(temp4, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b);  // sign check
+    temp5 = _mm_and_si128(temp5, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b);  // sign check
+    temp6 = _mm_and_si128(temp6, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b);  // sign check
+    temp7 = _mm_and_si128(temp7, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b);  // sign check
+    temp8 = _mm_and_si128(temp8, sign_reg);
+
+    temp1 = _mm_packus_epi16(temp1, zero_8x16b);
+    temp2 = _mm_packus_epi16(temp2, zero_8x16b);
+    temp3 = _mm_packus_epi16(temp3, zero_8x16b);
+    temp4 = _mm_packus_epi16(temp4, zero_8x16b);
+    temp5 = _mm_packus_epi16(temp5, zero_8x16b);
+    temp6 = _mm_packus_epi16(temp6, zero_8x16b);
+    temp7 = _mm_packus_epi16(temp7, zero_8x16b);
+    temp8 = _mm_packus_epi16(temp8, zero_8x16b);
+
+    _mm_storel_epi64((__m128i *) (&pu1_out[0]), temp1);
+    _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), temp2);
+    _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), temp3);
+    _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), temp4);
+    _mm_storel_epi64((__m128i *) (&pu1_out[4 * i4_out_stride]), temp5);
+    _mm_storel_epi64((__m128i *) (&pu1_out[5 * i4_out_stride]), temp6);
+    _mm_storel_epi64((__m128i *) (&pu1_out[6 * i4_out_stride]), temp7);
+    _mm_storel_epi64((__m128i *) (&pu1_out[7 * i4_out_stride]), temp8);
+}
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized chroma
+ *resiude and prediction buffer
+ *
+ * @par Description:
+ *  The quantized residue is first inverse quantized, then inverse transformed.
+ *  This inverse transformed content is added to the prediction buffer to recon-
+ *  struct the end output
+ *
+ * @param[in] pi2_src
+ *  quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ *  prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ *  reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ *  quantization buffer stride
+ *
+ * @param[in] i4_pred_stride,
+ *  Prediction buffer stride
+ *
+ * @param[in] i4_out_stride
+ *  recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ *  pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ *  pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ *  Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void isvc_iquant_itrans_recon_chroma_4x4_dc_ssse3(
+    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
+    buffer_container_t *ps_res, buffer_container_t *ps_rec,
+    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
+    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
+{
+    WORD16 *pi2_src = ps_src->pv_data;
+    WORD16 *pi2_res = ps_res->pv_data;
+    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
+    UWORD8 *pu1_pred = ps_pred->pv_data;
+    UWORD8 *pu1_out = ps_rec->pv_data;
+    WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_res_stride = ps_res->i4_data_stride;
+    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
+    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
+    WORD32 i4_out_stride = ps_rec->i4_data_stride;
+    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
+    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
+    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
+    WORD16 q0 = pi2_dc_src[0];  // DC value won't be dequantized for chroma
+                                // inverse transform
+    WORD16 i_macro = ((q0 + 32) >> 6);
+
+    __m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg;
+    __m128i zero_8x16b = _mm_setzero_si128();  // all bits reset to zero
+    __m128i chroma_mask = _mm_set1_epi16(0xFF);
+    __m128i value_add = _mm_set1_epi16(i_macro);
+    __m128i out_r0, out_r1, out_r2, out_r3;
+
+    UNUSED(pi2_src);
+    UNUSED(pu2_iscal_mat);
+    UNUSED(pu2_weigh_mat);
+    UNUSED(u4_qp_div_6);
+    UNUSED(pi2_tmp);
+    UNUSED(u1_res_accumulate);
+    UNUSED(i4_src_stride);
+    UNUSED(i4_res_stride);
+    UNUSED(i4_res_pred_stride);
+    UNUSED(pi2_res);
+    UNUSED(pi2_res_pred);
+    UNUSED(i4_iq_start_idx);
+
+    /* Implement residue accumulation */
+    ASSERT(0);
+
+    // Load pred buffer
+    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));  // p00 p01 p02 p03 0 0 0 0 0
+                                                            // 0 0 0 -- all 8 bits
+    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride]));  // p10 p11 p12 p13 0 0 0 0
+                                                                         // 0 0 0 0 -- all 8 bits
+    pred_r2 =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride]));  // p20 p21 p22 p23 0 0 0 0
+                                                                       // 0 0 0 0 -- all 8 bits
+    pred_r3 =
+        _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride]));  // p30 p31 p32 p33 0 0 0 0
+                                                                       // 0 0 0 0 -- all 8 bits
+
+    pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
+    pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
+    pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
+    pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
+
+    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1);  // p00 p01 p02 p03 p10 p11 p12 p13
+    pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3);  // p20 p21 p22p p23 p30 p31 p32 p33
+
+    pred_r0 = _mm_add_epi16(value_add, pred_r0);
+    pred_r2 = _mm_add_epi16(value_add, pred_r2);
+
+    /*------------------------------------------------------------------*/
+    // Clipping the results to 8 bits
+    sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b);  // sign check
+    pred_r0 = _mm_and_si128(pred_r0, sign_reg);
+    sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b);
+    pred_r2 = _mm_and_si128(pred_r2, sign_reg);
+
+    pred_r0 = _mm_packus_epi16(pred_r0, pred_r2);
+    pred_r1 = _mm_srli_si128(pred_r0, 4);
+    pred_r2 = _mm_srli_si128(pred_r1, 4);
+    pred_r3 = _mm_srli_si128(pred_r2, 4);
+
+    pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b);  // p00 p01 p02 p03 -- all 16 bits
+    pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b);  // p10 p11 p12 p13 -- all 16 bits
+    pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b);  // p20 p21 p22 p23 -- all 16 bits
+    pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b);  // p30 p31 p32 p33 -- all 16 bits
+
+    chroma_mask = _mm_set1_epi16(0xFF00);
+    out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
+    out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride]));
+    out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]));
+    out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]));
+
+    out_r0 = _mm_and_si128(out_r0, chroma_mask);
+    out_r1 = _mm_and_si128(out_r1, chroma_mask);
+    out_r2 = _mm_and_si128(out_r2, chroma_mask);
+    out_r3 = _mm_and_si128(out_r3, chroma_mask);
+
+    out_r0 = _mm_add_epi8(out_r0, pred_r0);
+    out_r1 = _mm_add_epi8(out_r1, pred_r1);
+    out_r2 = _mm_add_epi8(out_r2, pred_r2);
+    out_r3 = _mm_add_epi8(out_r3, pred_r3);
+
+    _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0);
+    _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1);
+    _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2);
+    _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3);
+}
--- a/common/x86/svc/isvc_iquant_itrans_recon_sse42.c
+++ b/common/x86/svc/isvc_iquant_itrans_recon_sse42.c
--- a/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c
+++ b/common/x86/svc/isvc_iquant_itrans_recon_ssse3.c
--- a/common/x86/svc/isvc_mem_fns_sse42.c
+++ b/common/x86/svc/isvc_mem_fns_sse42.c
@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+
+ * *******************************************************************************
+
+ * * @file
+ *  isvc_mem_fns_sse42.c
+ *
+ * @brief
+ *  SSE4.2 variants of
+ * functions used for memory operations
+ *
+
+ * *******************************************************************************
+
+ */
+#include <string.h>
+#include <immintrin.h>
+
+#include "ih264_typedefs.h"
+#include "isvc_mem_fns.h"
+
+void isvc_memset_2d_sse42(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32 i4_blk_wd,
+                          WORD32 i4_blk_ht)
+{
+    WORD32 i, j;
+
+    if((i4_blk_wd == 4) && (i4_blk_ht == 4))
+    {
+        *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        *((WORD32 *) (pu1_dst)) = _mm_cvtsi128_si32(_mm_set1_epi8(u1_val));
+    }
+    else if((i4_blk_wd == 8) && (i4_blk_ht == 8))
+    {
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+        pu1_dst += i4_dst_stride;
+
+        _mm_storel_epi64((__m128i *) (&pu1_dst[0]), _mm_set1_epi8(u1_val));
+    }
+    else if((i4_blk_wd % 16 == 0) && (i4_blk_ht % 16 == 0))
+    {
+        UWORD8 *pu1_dst_col_ptr, *pu1_dst_row_ptr;
+
+        WORD32 i4_width_by_16 = i4_blk_wd / 16;
+        WORD32 i4_height_by_16 = i4_blk_ht / 16;
+
+        for(i = 0; i < i4_height_by_16; i++)
+        {
+            pu1_dst_row_ptr = pu1_dst + i * 16 * i4_dst_stride;
+
+            for(j = 0; j < i4_width_by_16; j++)
+            {
+                pu1_dst_col_ptr = pu1_dst_row_ptr + (j << 4);
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+                pu1_dst_col_ptr += i4_dst_stride;
+
+                _mm_storeu_si128((__m128i *) (&pu1_dst_col_ptr[0]), _mm_set1_epi8(u1_val));
+            }
+        }
+    }
+    else
+    {
+        for(i = 0; i < i4_blk_ht; i++)
+        {
+            memset(pu1_dst, u1_val, i4_blk_wd);
+
+            pu1_dst += i4_dst_stride;
+        }
+    }
+}
--- a/common/x86/svc/isvc_mem_fns_ssse3.c
+++ b/common/x86/svc/isvc_mem_fns_ssse3.c
@ -0,0 +1,435 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ *******************************************************************************
+ * @file
+ *  isvc_mem_fns_atom_intr.c
+ *
+ * @brief
+ *  Functions used for memory operations
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "ih264_typedefs.h"
+#include "isvc_mem_fns.h"
+
+#include <immintrin.h>
+
+/**
+********************************************************************************
+*  @brief  copies a 2d blk from one location to another
+*
+*  @param[out] pu1_dst : dst pointer
+*
+*  @param[in] i4_dst_stride: stride of destination
+*
+*  @param[in] pu1_src : src ptr
+*
+*  @param[in] i4_src_stride: stride of src
+*
+*  @param[in] i4_blk_wd : blk width
+*
+*  @param[in] i4_blk_ht : blk height
+*
+*  @return void
+********************************************************************************
+*/
+void isvc_copy_2d_ssse3(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 *pu1_src,
+                        WORD32 i4_src_stride, WORD32 i4_blk_wd, WORD32 i4_blk_ht)
+{
+    WORD32 i, j;
+    /* all 128 bit registers are named with a suffix mxnb, where m is the */
+    /* number of n bits packed in the register                            */
+
+    if(0 == (i4_blk_wd & 31)) /* wd multiple of 32 case */
+    {
+        __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
+        __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
+
+        if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
+        {
+            __m128i src8_16x8b, src9_16x8b, src10_16x8b, src11_16x8b;
+            __m128i src12_16x8b, src13_16x8b, src14_16x8b, src15_16x8b;
+
+            for(i = 0; i < i4_blk_ht; i += 8)
+            {
+                for(j = 0; j < i4_blk_wd; j += 32)
+                {
+                    src0_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src));  // i = 0
+                    src1_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+                    src4_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride));  // i = 4
+                    src5_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride));  // i = 5
+                    src6_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride));  // i = 6
+                    src7_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride));  // i = 7
+                    /* Add 16 as offset */
+                    src8_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 16));  // i = 0
+                    src9_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16));  // i = 1
+                    src10_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16));  // i = 2
+                    src11_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16));  // i = 3
+                    src12_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride + 16));  // i = 4
+                    src13_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride + 16));  // i = 5
+                    src14_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride + 16));  // i = 6
+                    src15_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride + 16));  // i = 7
+
+                    _mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
+
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 16), src8_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src9_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src10_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src11_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride + 16), src12_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride + 16), src13_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride + 16), src14_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride + 16), src15_16x8b);
+
+                    pu1_src += 32;
+                    pu1_dst += 32;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
+            }
+        }
+        else /* ht multiple of 4 case */
+        {
+            for(i = 0; i < i4_blk_ht; i += 4)
+            {
+                for(j = 0; j < i4_blk_wd; j += 32)
+                {
+                    src0_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src));  // i = 0
+                    src1_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+                    /* Add 16 as offset */
+                    src4_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 16));  // i = 0
+                    src5_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + i4_src_stride + 16));  // i = 1
+                    src6_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride + 16));  // i = 2
+                    src7_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride + 16));  // i = 3
+
+                    _mm_storeu_si128((__m128i *) (pu1_dst), src0_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride), src1_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 16), src4_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + i4_dst_stride + 16), src5_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride + 16), src6_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride + 16), src7_16x8b);
+
+                    pu1_src += 32;
+                    pu1_dst += 32;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
+            }
+        }
+    }
+    else if(0 == (i4_blk_wd & 15)) /* wd multiple of 16 case */
+    {
+        __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
+
+        if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
+        {
+            __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
+
+            for(i = 0; i < i4_blk_ht; i += 8)
+            {
+                for(j = 0; j < i4_blk_wd; j += 16)
+                {
+                    src0_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride));  // i = 0
+                    src1_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+                    src4_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 4 * i4_src_stride));  // i = 4
+                    src5_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 5 * i4_src_stride));  // i = 5
+                    src6_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 6 * i4_src_stride));  // i = 6
+                    src7_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 7 * i4_src_stride));  // i = 7
+
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
+
+                    pu1_src += 16;
+                    pu1_dst += 16;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
+            }
+        }
+        else /* ht multiple of 4 case */
+        {
+            for(i = 0; i < i4_blk_ht; i += 4)
+            {
+                for(j = 0; j < i4_blk_wd; j += 16)
+                {
+                    src0_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 0 * i4_src_stride));  // i = 0
+                    src1_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 1 * i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadu_si128((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
+                    _mm_storeu_si128((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
+
+                    pu1_src += 16;
+                    pu1_dst += 16;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
+            }
+        }
+    }
+    else if(0 == (i4_blk_wd & 7)) /* wd multiple of 8 case */
+    {
+        __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
+
+        if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
+        {
+            __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
+
+            for(i = 0; i < i4_blk_ht; i += 8)
+            {
+                for(j = 0; j < i4_blk_wd; j += 8)
+                {
+                    src0_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride));  // i = 0
+                    src1_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+                    src4_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride));  // i = 4
+                    src5_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride));  // i = 5
+                    src6_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride));  // i = 6
+                    src7_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride));  // i = 7
+
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 4 * i4_dst_stride), src4_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 5 * i4_dst_stride), src5_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 6 * i4_dst_stride), src6_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 7 * i4_dst_stride), src7_16x8b);
+
+                    pu1_src += 8;
+                    pu1_dst += 8;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
+            }
+        }
+        else /* ht multiple of 4 case */
+        {
+            for(i = 0; i < i4_blk_ht; i += 4)
+            {
+                for(j = 0; j < i4_blk_wd; j += 8)
+                {
+                    src0_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride));  // i = 0
+                    src1_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 0 * i4_dst_stride), src0_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 1 * i4_dst_stride), src1_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 2 * i4_dst_stride), src2_16x8b);
+                    _mm_storel_epi64((__m128i *) (pu1_dst + 3 * i4_dst_stride), src3_16x8b);
+
+                    pu1_src += 8;
+                    pu1_dst += 8;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
+            }
+        }
+    }
+    else /* wd multiple of 4 case */
+    {
+        __m128i src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
+        WORD32 src0, src1, src2, src3;
+        if(0 == (i4_blk_ht & 7)) /* ht multiple of 8 case */
+        {
+            __m128i src4_16x8b, src5_16x8b, src6_16x8b, src7_16x8b;
+            WORD32 src4, src5, src6, src7;
+
+            for(i = 0; i < i4_blk_ht; i += 8)
+            {
+                for(j = 0; j < i4_blk_wd; j += 4)
+                {
+                    src0_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride));  // i = 0
+                    src1_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+                    src4_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 4 * i4_src_stride));  // i = 4
+                    src5_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 5 * i4_src_stride));  // i = 5
+                    src6_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 6 * i4_src_stride));  // i = 6
+                    src7_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 7 * i4_src_stride));  // i = 7
+
+                    src0 = _mm_cvtsi128_si32(src0_16x8b);
+                    src1 = _mm_cvtsi128_si32(src1_16x8b);
+                    src2 = _mm_cvtsi128_si32(src2_16x8b);
+                    src3 = _mm_cvtsi128_si32(src3_16x8b);
+                    src4 = _mm_cvtsi128_si32(src4_16x8b);
+                    src5 = _mm_cvtsi128_si32(src5_16x8b);
+                    src6 = _mm_cvtsi128_si32(src6_16x8b);
+                    src7 = _mm_cvtsi128_si32(src7_16x8b);
+
+                    *(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0;
+                    *(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1;
+                    *(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2;
+                    *(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3;
+                    *(WORD32 *) (&pu1_dst[4 * i4_dst_stride]) = src4;
+                    *(WORD32 *) (&pu1_dst[5 * i4_dst_stride]) = src5;
+                    *(WORD32 *) (&pu1_dst[6 * i4_dst_stride]) = src6;
+                    *(WORD32 *) (&pu1_dst[7 * i4_dst_stride]) = src7;
+
+                    pu1_src += 4;
+                    pu1_dst += 4;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 8 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 8 * i4_dst_stride;
+            }
+        }
+        else /* ht multiple of 4 case */
+        {
+            for(i = 0; i < i4_blk_ht; i += 4)
+            {
+                for(j = 0; j < i4_blk_wd; j += 4)
+                {
+                    src0_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 0 * i4_src_stride));  // i = 0
+                    src1_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 1 * i4_src_stride));  // i = 1
+                    src2_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 2 * i4_src_stride));  // i = 2
+                    src3_16x8b =
+                        _mm_loadl_epi64((__m128i *) (pu1_src + 3 * i4_src_stride));  // i = 3
+
+                    src0 = _mm_cvtsi128_si32(src0_16x8b);
+                    src1 = _mm_cvtsi128_si32(src1_16x8b);
+                    src2 = _mm_cvtsi128_si32(src2_16x8b);
+                    src3 = _mm_cvtsi128_si32(src3_16x8b);
+
+                    *(WORD32 *) (&pu1_dst[0 * i4_dst_stride]) = src0;
+                    *(WORD32 *) (&pu1_dst[1 * i4_dst_stride]) = src1;
+                    *(WORD32 *) (&pu1_dst[2 * i4_dst_stride]) = src2;
+                    *(WORD32 *) (&pu1_dst[3 * i4_dst_stride]) = src3;
+
+                    pu1_src += 4;
+                    pu1_dst += 4;
+                }
+
+                pu1_src = pu1_src - i4_blk_wd + 4 * i4_src_stride;
+                pu1_dst = pu1_dst - i4_blk_wd + 4 * i4_dst_stride;
+            }
+        }
+    }
+}
--- a/common/x86/svc/isvc_padding_ssse3.c
+++ b/common/x86/svc/isvc_padding_ssse3.c
@ -0,0 +1,294 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  ih264_padding_atom_intr.c
+*
+* @brief
+*  Contains function definitions for Padding
+*
+* @author
+*  Srinivas T
+*
+* @par List of Functions:
+*   - isvc_pad_left_luma_ssse3()
+*   - isvc_pad_left_chroma_ssse3()
+*   - isvc_pad_right_luma_ssse3()
+*   - isvc_pad_right_chroma_ssse3()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#include <string.h>
+#include <assert.h>
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "isvc_mem_fns.h"
+#include "ih264_debug.h"
+
+#include <immintrin.h>
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Padding (luma block) at the left of a 2d array
+*
+* @par Description:
+*   The left column of a 2d array is replicated for pad_size times at the left
+*
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array
+*
+* @param[in] pad_size
+*  integer -padding size of the array
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+void isvc_pad_left_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
+{
+    WORD32 row;
+    WORD32 i;
+    UWORD8 *pu1_dst;
+
+    ASSERT(pad_size % 8 == 0);
+
+    for(row = 0; row < ht; row++)
+    {
+        __m128i src_temp0_16x8b;
+
+        pu1_dst = pu1_src - pad_size;
+        src_temp0_16x8b = _mm_set1_epi8(*pu1_src);
+        for(i = 0; i < pad_size; i += 8)
+        {
+            _mm_storel_epi64((__m128i *) (pu1_dst + i), src_temp0_16x8b);
+        }
+        pu1_src += src_strd;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Padding (chroma block) at the left of a 2d array
+*
+* @par Description:
+*   The left column of a 2d array is replicated for pad_size times at the left
+*
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array (each colour component)
+*
+* @param[in] pad_size
+*  integer -padding size of the array
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+void isvc_pad_left_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
+{
+    WORD32 row;
+    WORD32 col;
+    UWORD8 *pu1_dst;
+
+    ASSERT(pad_size % 8 == 0);
+    for(row = 0; row < ht; row++)
+    {
+        __m128i src_temp0_16x8b;
+
+        pu1_dst = pu1_src - pad_size;
+        src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) pu1_src));
+        for(col = 0; col < pad_size; col += 8)
+        {
+            _mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
+        }
+        pu1_src += src_strd;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (luma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array
+*
+* @param[in] pad_size
+*  integer -padding size of the array
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+void isvc_pad_right_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
+{
+    WORD32 row;
+    WORD32 col;
+    UWORD8 *pu1_dst;
+
+    ASSERT(pad_size % 8 == 0);
+
+    for(row = 0; row < ht; row++)
+    {
+        __m128i src_temp0_16x8b;
+
+        pu1_dst = pu1_src;
+        src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1));
+        for(col = 0; col < pad_size; col += 8)
+        {
+            _mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
+        }
+        pu1_src += src_strd;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (chroma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array (each colour component)
+*
+* @param[in] pad_size
+*  integer -padding size of the array
+*
+* @param[in] ht
+*  integer height of the array
+*
+* @param[in] wd
+*  integer width of the array
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+void isvc_pad_right_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, WORD32 pad_size)
+{
+    WORD32 row;
+    WORD32 col;
+    UWORD8 *pu1_dst;
+
+    ASSERT(pad_size % 8 == 0);
+
+    for(row = 0; row < ht; row++)
+    {
+        __m128i src_temp0_16x8b;
+
+        pu1_dst = pu1_src;
+        src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *) (pu1_src - 2)));
+        for(col = 0; col < pad_size; col += 8)
+        {
+            _mm_storel_epi64((__m128i *) (pu1_dst + col), src_temp0_16x8b);
+        }
+
+        pu1_src += src_strd;
+    }
+}
--- a/common/x86/svc/isvc_resi_trans_quant_sse42.c
+++ b/common/x86/svc/isvc_resi_trans_quant_sse42.c
--- a/encoder/arm/svc/isvce_downscaler_neon.c
+++ b/encoder/arm/svc/isvce_downscaler_neon.c
@ -0,0 +1,927 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file ih264e_downscaler_neon.c
+*
+* @brief
+*  This file contains the ARMV8 SIMD version of the function which does
+*  horizontal scaling and transpose
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - ih264e_horizontal_downscale_and_transpose_av8()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "isvc_defs.h"
+#include "isvce_defs.h"
+#include "isvc_structs.h"
+#include "isvce_downscaler_private_defs.h"
+
+void isvce_horizontal_downscale_and_transpose_neon(
+    downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
+    FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
+{
+    WORD32 i, j;
+    UWORD8 u1_phase;
+    UWORD8 *pu1_src_j, *pu1_dst_j;
+    UWORD8 *pu1_in_pixel;
+    UWORD8 *pu1_out_pixel;
+    WORD8 *pi1_filter_grid;
+    UWORD16 u2_full_pixel_inc;
+    UWORD32 u4_num_iterations_vertical_by_16, u4_num_iterations_vertical_by_8;
+    UWORD32 u4_rem_vert_loop_by_8, u4_rem_vert_loop_by_4;
+    UWORD32 u4_rem_vert_loop;
+    UWORD32 u4_height_finished;
+
+    uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4,
+        reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7;
+
+    uint16x8_t reg_16x8_src_r0, reg_16x8_src_r1, reg_16x8_src_r2, reg_16x8_src_r3, reg_16x8_src_r4,
+        reg_16x8_src_r5, reg_16x8_src_r6, reg_16x8_src_r7;
+
+    int16x8_t reg_16x8_mul_r0, reg_16x8_mul_r1, reg_16x8_mul_r2, reg_16x8_mul_r3, reg_16x8_mul_r4,
+        reg_16x8_mul_r5, reg_16x8_mul_r6, reg_16x8_mul_r7;
+
+    int32x4_t reg_32x4_sum_r0, reg_32x4_sum_r1, reg_32x4_sum_r2, reg_32x4_sum_r3, reg_32x4_sum_r4,
+        reg_32x4_sum_r5, reg_32x4_sum_r6, reg_32x4_sum_r7;
+
+    int32x4_t reg_32x4_sum_r01, reg_32x4_sum_r23, reg_32x4_sum_r45, reg_32x4_sum_r67,
+        reg_32x4_sum_r89, reg_32x4_sum_r1011, reg_32x4_sum_r1213, reg_32x4_sum_r1415;
+
+    uint8x8_t reg_8x8_src_r8, reg_8x8_src_r9, reg_8x8_src_r10, reg_8x8_src_r11, reg_8x8_src_r12,
+        reg_8x8_src_r13, reg_8x8_src_r14, reg_8x8_src_r15;
+
+    uint16x8_t reg_16x8_src_r8, reg_16x8_src_r9, reg_16x8_src_r10, reg_16x8_src_r11,
+        reg_16x8_src_r12, reg_16x8_src_r13, reg_16x8_src_r14, reg_16x8_src_r15;
+
+    int16x8_t reg_16x8_mul_r8, reg_16x8_mul_r9, reg_16x8_mul_r10, reg_16x8_mul_r11,
+        reg_16x8_mul_r12, reg_16x8_mul_r13, reg_16x8_mul_r14, reg_16x8_mul_r15;
+
+    int32x4_t reg_32x4_sum_r8, reg_32x4_sum_r9, reg_32x4_sum_r10, reg_32x4_sum_r11,
+        reg_32x4_sum_r12, reg_32x4_sum_r13, reg_32x4_sum_r14, reg_32x4_sum_r15;
+
+    uint8x16_t reg_8x16_src_r0, reg_8x16_src_r1, reg_8x16_src_r2, reg_8x16_src_r3, reg_8x16_src_r4,
+        reg_8x16_src_r5, reg_8x16_src_r6, reg_8x16_src_r7;
+
+    uint16x8_t reg_16x8_src_cb_r0, reg_16x8_src_cb_r1, reg_16x8_src_cb_r2, reg_16x8_src_cb_r3,
+        reg_16x8_src_cb_r4, reg_16x8_src_cb_r5, reg_16x8_src_cb_r6, reg_16x8_src_cb_r7;
+
+    uint16x8_t reg_16x8_src_cr_r0, reg_16x8_src_cr_r1, reg_16x8_src_cr_r2, reg_16x8_src_cr_r3,
+        reg_16x8_src_cr_r4, reg_16x8_src_cr_r5, reg_16x8_src_cr_r6, reg_16x8_src_cr_r7;
+
+    int16x8_t reg_16x8_mul_cb_r0, reg_16x8_mul_cb_r1, reg_16x8_mul_cb_r2, reg_16x8_mul_cb_r3,
+        reg_16x8_mul_cb_r4, reg_16x8_mul_cb_r5, reg_16x8_mul_cb_r6, reg_16x8_mul_cb_r7;
+
+    int16x8_t reg_16x8_mul_cr_r0, reg_16x8_mul_cr_r1, reg_16x8_mul_cr_r2, reg_16x8_mul_cr_r3,
+        reg_16x8_mul_cr_r4, reg_16x8_mul_cr_r5, reg_16x8_mul_cr_r6, reg_16x8_mul_cr_r7;
+
+    int32x4_t reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1, reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3,
+        reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5, reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7;
+
+    int32x4_t reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1, reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3,
+        reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5, reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7;
+
+    int32x4_t reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23, reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67;
+    uint16x4_t reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67;
+    uint16x8_t reg_16x8_sum_cb_r0_r7;
+    uint8x8_t reg_8x8_sum_cb_r0_r7;
+
+    int32x4_t reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23, reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67;
+    uint16x4_t reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67;
+    uint16x8_t reg_16x8_sum_cr_r0_r7;
+    uint8x8_t reg_8x8_sum_cr_r0_r7;
+    uint16x8_t reg_16x8_sum_cb_cr_r0_r3;
+    uint8x8_t reg_8x8_sum_cb_cr_r0_r3;
+
+    int32x4_t reg_32x4_sum_cb_cr_r0;
+    uint16x4_t reg_16x4_sum_cb_cr_r0;
+
+    int32x4_t reg_32x4_zero = vdupq_n_s32(0);
+
+    uint16x4_t reg_16x4_sum_r01_23, reg_16x4_sum_r45_67;
+    uint16x4_t reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15;
+    uint16x8_t reg_16x8_sum_r0_r7, reg_16x8_sum_r8_r15;
+    uint8x8_t reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15;
+    uint8x16_t reg_8x16_sum_r0_r15;
+    int8x8_t reg_8x8_filt_coeff_grid;
+    int16x8_t reg_16x8_filt_coeff_grid;
+    int32x4x2_t reg_32x4x2_sum_r01, reg_32x4x2_sum_r23, reg_32x4x2_sum_r45, reg_32x4x2_sum_r67;
+    int32x4x2_t reg_32x4x2_sum_r89, reg_32x4x2_sum_r1011, reg_32x4x2_sum_r1213,
+        reg_32x4x2_sum_r1415;
+    uint8x16x2_t reg_8x16x2_src_r0, reg_8x16x2_src_r1, reg_8x16x2_src_r2, reg_8x16x2_src_r3;
+
+    downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
+
+    UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
+    UWORD32 u4_src_vert_increments = ps_scaler_state->u4_vert_increment;
+    UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
+    UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data;
+    UWORD32 u4_in_stride = ps_src->i4_data_stride;
+    UWORD8 *pu1_dst = (UWORD8 *) ps_dst->pv_data;
+    UWORD32 u4_out_stride = ps_dst->i4_data_stride;
+    UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
+
+    /* Offset the input so that the input pixel to be processed
+    co-incides with the centre of filter (4th coefficient)*/
+    pu1_src += (1 + u1_is_chroma);
+
+    ASSERT((1 << DOWNSCALER_Q) == u4_src_vert_increments);
+
+    if(!u1_is_chroma)
+    {
+        u4_num_iterations_vertical_by_16 = u4_blk_ht >> 4;
+        u4_rem_vert_loop = u4_blk_ht % 16;
+
+        for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_16; j++)
+        {
+            pu1_src_j = pu1_src + ((j << 4) * u4_in_stride);
+            pu1_dst_j = pu1_dst + (j << 4);
+
+            u4_center_pixel_pos = u4_center_pixel_pos_src;
+
+            for(i = 0; i < (WORD32) u4_blk_wd; i++)
+            {
+                u1_phase = get_filter_phase(u4_center_pixel_pos);
+
+                pi1_filter_grid = pai1_filters[u1_phase];
+
+                /* Doing the Calculation for current Loop Count  */
+                u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+
+                pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+
+                pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
+
+                /******************************************************/
+                /* This loop is going vertically in bottom direction */
+                /* but the output pixels are stored in horizontal    */
+                /* direction in transpose manner                     */
+                /******************************************************/
+
+                /* r0-r7 */
+                reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
+                reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride);
+                reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride);
+                reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride);
+                reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride);
+                reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride);
+                reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride);
+                reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride);
+
+                /* r0-r7 */
+                reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
+                reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1);
+                reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2);
+                reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3);
+                reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4);
+                reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5);
+                reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6);
+                reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7);
+
+                /* r8-r15 */
+                reg_8x8_src_r8 = vld1_u8(pu1_in_pixel + 8 * u4_in_stride);
+                reg_8x8_src_r9 = vld1_u8(pu1_in_pixel + 9 * u4_in_stride);
+                reg_8x8_src_r10 = vld1_u8(pu1_in_pixel + 10 * u4_in_stride);
+                reg_8x8_src_r11 = vld1_u8(pu1_in_pixel + 11 * u4_in_stride);
+                reg_8x8_src_r12 = vld1_u8(pu1_in_pixel + 12 * u4_in_stride);
+                reg_8x8_src_r13 = vld1_u8(pu1_in_pixel + 13 * u4_in_stride);
+                reg_8x8_src_r14 = vld1_u8(pu1_in_pixel + 14 * u4_in_stride);
+                reg_8x8_src_r15 = vld1_u8(pu1_in_pixel + 15 * u4_in_stride);
+
+                reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
+
+                /*r0-r7 */
+                reg_16x8_mul_r0 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r1 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r2 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r3 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r4 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r5 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r6 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r7 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7), reg_16x8_filt_coeff_grid);
+
+                /* r8-r15 */
+                reg_16x8_src_r8 = vmovl_u8(reg_8x8_src_r8);
+                reg_16x8_src_r9 = vmovl_u8(reg_8x8_src_r9);
+                reg_16x8_src_r10 = vmovl_u8(reg_8x8_src_r10);
+                reg_16x8_src_r11 = vmovl_u8(reg_8x8_src_r11);
+                reg_16x8_src_r12 = vmovl_u8(reg_8x8_src_r12);
+                reg_16x8_src_r13 = vmovl_u8(reg_8x8_src_r13);
+                reg_16x8_src_r14 = vmovl_u8(reg_8x8_src_r14);
+                reg_16x8_src_r15 = vmovl_u8(reg_8x8_src_r15);
+
+                /* r0-r7 */
+                reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
+                reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1);
+                reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2);
+                reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3);
+                reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4);
+                reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5);
+                reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6);
+                reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7);
+
+                /* r8-r15 */
+                reg_16x8_mul_r8 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r8), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r9 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r9), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r10 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r10), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r11 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r11), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r12 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r12), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r13 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r13), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r14 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r14), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_r15 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r15), reg_16x8_filt_coeff_grid);
+
+                /* r0-r7 */
+                reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1);
+                reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3);
+                reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5);
+                reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7);
+
+                reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                reg_32x4_sum_r23 = vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
+                reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+                reg_32x4_sum_r67 = vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
+
+                /* r8-r15 */
+                reg_32x4_sum_r8 = vpaddlq_s16(reg_16x8_mul_r8);
+                reg_32x4_sum_r9 = vpaddlq_s16(reg_16x8_mul_r9);
+                reg_32x4_sum_r10 = vpaddlq_s16(reg_16x8_mul_r10);
+                reg_32x4_sum_r11 = vpaddlq_s16(reg_16x8_mul_r11);
+                reg_32x4_sum_r12 = vpaddlq_s16(reg_16x8_mul_r12);
+                reg_32x4_sum_r13 = vpaddlq_s16(reg_16x8_mul_r13);
+                reg_32x4_sum_r14 = vpaddlq_s16(reg_16x8_mul_r14);
+                reg_32x4_sum_r15 = vpaddlq_s16(reg_16x8_mul_r15);
+
+                /* r0-r7 */
+                reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23);
+                reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67);
+                reg_32x4_sum_r01 = vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                reg_32x4_sum_r45 = vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+
+                /* r8-r15 */
+                reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r8, reg_32x4_sum_r9);
+                reg_32x4x2_sum_r1011 = vuzpq_s32(reg_32x4_sum_r10, reg_32x4_sum_r11);
+                reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r12, reg_32x4_sum_r13);
+                reg_32x4x2_sum_r1415 = vuzpq_s32(reg_32x4_sum_r14, reg_32x4_sum_r15);
+
+                reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]);
+                reg_32x4_sum_r1011 =
+                    vaddq_s32(reg_32x4x2_sum_r1011.val[0], reg_32x4x2_sum_r1011.val[1]);
+                reg_32x4_sum_r1213 =
+                    vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]);
+                reg_32x4_sum_r1415 =
+                    vaddq_s32(reg_32x4x2_sum_r1415.val[0], reg_32x4x2_sum_r1415.val[1]);
+
+                /* r0-r7 */
+                reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
+                reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7);
+
+                /* r8-r15 */
+                reg_32x4x2_sum_r89 = vuzpq_s32(reg_32x4_sum_r89, reg_32x4_sum_r1011);
+                reg_32x4x2_sum_r1213 = vuzpq_s32(reg_32x4_sum_r1213, reg_32x4_sum_r1415);
+                reg_32x4_sum_r89 = vaddq_s32(reg_32x4x2_sum_r89.val[0], reg_32x4x2_sum_r89.val[1]);
+                reg_32x4_sum_r1213 =
+                    vaddq_s32(reg_32x4x2_sum_r1213.val[0], reg_32x4x2_sum_r1213.val[1]);
+
+                /* r0-r7 */
+                reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67);
+                reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7);
+
+                reg_16x4_sum_r8_r11 = vqrshrun_n_s32(reg_32x4_sum_r89, 7);
+                reg_16x4_sum_r12_r15 = vqrshrun_n_s32(reg_32x4_sum_r1213, 7);
+
+                reg_16x8_sum_r8_r15 = vcombine_u16(reg_16x4_sum_r8_r11, reg_16x4_sum_r12_r15);
+                reg_8x8_sum_r8_r15 = vqmovn_u16(reg_16x8_sum_r8_r15);
+
+                reg_8x16_sum_r0_r15 = vcombine_u8(reg_8x8_sum_r0_r7, reg_8x8_sum_r8_r15);
+
+                /* r0-r7 */
+                vst1q_u8(pu1_out_pixel, reg_8x16_sum_r0_r15);
+
+                pu1_out_pixel += 16;
+                pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 4)) >> DOWNSCALER_Q;
+
+                /* Update the context for next Loop Count */
+                u4_center_pixel_pos += u4_src_horz_increments;
+            }
+        }
+
+        /* Loop for the remaining height less than 16 */
+        if(u4_rem_vert_loop)
+        {
+            u4_rem_vert_loop_by_8 = u4_rem_vert_loop >> 3;
+            u4_rem_vert_loop = u4_rem_vert_loop % 8;
+
+            u4_height_finished = (u4_num_iterations_vertical_by_16 << 4);
+
+            pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride);
+            pu1_dst_j = pu1_dst + u4_height_finished;
+
+            u4_center_pixel_pos = u4_center_pixel_pos_src;
+
+            /* 8 <= remaining height < 16 */
+            if(u4_rem_vert_loop_by_8)
+            {
+                for(i = 0; i < (WORD32) u4_blk_wd; i++)
+                {
+                    u1_phase = get_filter_phase(u4_center_pixel_pos);
+                    pi1_filter_grid = pai1_filters[u1_phase];
+
+                    u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+
+                    pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+
+                    pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                    reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
+
+                    for(j = u4_rem_vert_loop_by_8; j > 0; j--)
+                    {
+                        /******************************************************/
+                        /* This loop is going vertically in bottom direction */
+                        /* but the output pixels are stored in horizontal    */
+                        /* direction in transpose manner                     */
+                        /******************************************************/
+
+                        reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
+                        reg_8x8_src_r1 = vld1_u8(pu1_in_pixel + u4_in_stride);
+                        reg_8x8_src_r2 = vld1_u8(pu1_in_pixel + 2 * u4_in_stride);
+                        reg_8x8_src_r3 = vld1_u8(pu1_in_pixel + 3 * u4_in_stride);
+                        reg_8x8_src_r4 = vld1_u8(pu1_in_pixel + 4 * u4_in_stride);
+                        reg_8x8_src_r5 = vld1_u8(pu1_in_pixel + 5 * u4_in_stride);
+                        reg_8x8_src_r6 = vld1_u8(pu1_in_pixel + 6 * u4_in_stride);
+                        reg_8x8_src_r7 = vld1_u8(pu1_in_pixel + 7 * u4_in_stride);
+
+                        reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
+                        reg_16x8_src_r1 = vmovl_u8(reg_8x8_src_r1);
+                        reg_16x8_src_r2 = vmovl_u8(reg_8x8_src_r2);
+                        reg_16x8_src_r3 = vmovl_u8(reg_8x8_src_r3);
+                        reg_16x8_src_r4 = vmovl_u8(reg_8x8_src_r4);
+                        reg_16x8_src_r5 = vmovl_u8(reg_8x8_src_r5);
+                        reg_16x8_src_r6 = vmovl_u8(reg_8x8_src_r6);
+                        reg_16x8_src_r7 = vmovl_u8(reg_8x8_src_r7);
+                        reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
+
+                        reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0),
+                                                    reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r1),
+                                                    reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r2),
+                                                    reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r3),
+                                                    reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_r4 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r4),
+                                                    reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_r5 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r5),
+                                                    reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_r6 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r6),
+                                                    reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_r7 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r7),
+                                                    reg_16x8_filt_coeff_grid);
+
+                        reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
+                        reg_32x4_sum_r1 = vpaddlq_s16(reg_16x8_mul_r1);
+                        reg_32x4_sum_r2 = vpaddlq_s16(reg_16x8_mul_r2);
+                        reg_32x4_sum_r3 = vpaddlq_s16(reg_16x8_mul_r3);
+                        reg_32x4_sum_r4 = vpaddlq_s16(reg_16x8_mul_r4);
+                        reg_32x4_sum_r5 = vpaddlq_s16(reg_16x8_mul_r5);
+                        reg_32x4_sum_r6 = vpaddlq_s16(reg_16x8_mul_r6);
+                        reg_32x4_sum_r7 = vpaddlq_s16(reg_16x8_mul_r7);
+
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_sum_r1);
+                        reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_r2, reg_32x4_sum_r3);
+                        reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r4, reg_32x4_sum_r5);
+                        reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_r6, reg_32x4_sum_r7);
+
+                        reg_32x4_sum_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                        reg_32x4_sum_r23 =
+                            vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
+                        reg_32x4_sum_r45 =
+                            vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+                        reg_32x4_sum_r67 =
+                            vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
+
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_sum_r23);
+                        reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_r45, reg_32x4_sum_r67);
+                        reg_32x4_sum_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                        reg_32x4_sum_r45 =
+                            vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+
+                        reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
+                        reg_16x4_sum_r45_67 = vqrshrun_n_s32(reg_32x4_sum_r45, 7);
+
+                        reg_16x8_sum_r0_r7 = vcombine_u16(reg_16x4_sum_r01_23, reg_16x4_sum_r45_67);
+                        reg_8x8_sum_r0_r7 = vqmovn_u16(reg_16x8_sum_r0_r7);
+
+                        vst1_u8(pu1_out_pixel, reg_8x8_sum_r0_r7);
+
+                        pu1_out_pixel += 8;
+                        pu1_in_pixel +=
+                            (u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q;
+                    }
+                    /* Update the context for next Loop Count */
+                    u4_center_pixel_pos += u4_src_horz_increments;
+                }
+            }
+
+            /* 1 <= remaining height < 8 */
+            if(u4_rem_vert_loop)
+            {
+                u4_height_finished =
+                    ((u4_num_iterations_vertical_by_16 << 4) + (u4_rem_vert_loop_by_8 << 3));
+                pu1_src_j = pu1_src + u4_height_finished * u4_in_stride;
+                pu1_dst_j = pu1_dst + u4_height_finished;
+
+                u4_center_pixel_pos = u4_center_pixel_pos_src;
+
+                for(i = 0; i < (WORD32) u4_blk_wd; i++)
+                {
+                    u1_phase = get_filter_phase(u4_center_pixel_pos);
+                    pi1_filter_grid = pai1_filters[u1_phase];
+
+                    u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+
+                    pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+
+                    pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                    reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
+
+                    for(j = u4_rem_vert_loop; j > 0; j--)
+                    {
+                        /******************************************************/
+                        /* This loop is going vertically in bottom direction */
+                        /* but the output pixels are stored in horizontal    */
+                        /* direction in transpose manner                     */
+                        /******************************************************/
+
+                        reg_8x8_src_r0 = vld1_u8(pu1_in_pixel);
+                        reg_16x8_src_r0 = vmovl_u8(reg_8x8_src_r0);
+
+                        reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
+
+                        reg_16x8_mul_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_r0),
+                                                    reg_16x8_filt_coeff_grid);
+
+                        reg_32x4_sum_r0 = vpaddlq_s16(reg_16x8_mul_r0);
+
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r0, reg_32x4_zero);
+                        reg_32x4_sum_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_r01, reg_32x4_zero);
+                        reg_32x4_sum_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+
+                        reg_16x4_sum_r01_23 = vqrshrun_n_s32(reg_32x4_sum_r01, 7);
+
+                        vst1_lane_u8(pu1_out_pixel, vreinterpret_u8_u16(reg_16x4_sum_r01_23), 0);
+                        pu1_out_pixel += 1;
+                        pu1_in_pixel += (u4_src_vert_increments * u4_in_stride) >> DOWNSCALER_Q;
+                    }
+                    /* Update the context for next Loop Count */
+                    u4_center_pixel_pos += u4_src_horz_increments;
+                }
+            }
+        }
+    }
+    /* for chroma */
+    else
+    {
+        u4_num_iterations_vertical_by_8 = u4_blk_ht >> 3;
+        u4_rem_vert_loop = u4_blk_ht % 8;
+
+        for(j = 0; j < (WORD32) u4_num_iterations_vertical_by_8; j++)
+        {
+            pu1_src_j = pu1_src + ((j << 3) * u4_in_stride);
+            pu1_dst_j = pu1_dst + (j << 3);
+
+            u4_center_pixel_pos = u4_center_pixel_pos_src;
+
+            for(i = 0; i < (WORD32) u4_blk_wd; i++)
+            {
+                u1_phase = get_filter_phase(u4_center_pixel_pos);
+                pi1_filter_grid = pai1_filters[u1_phase];
+
+                /*Doing the Calculation for current Loop Count  */
+                u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+
+                pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+
+                pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
+
+                /******************************************************/
+                /* This loop is going vertically in bottom direction */
+                /* but the output pixels are stored in horizontal    */
+                /* direction in transpose manner                     */
+                /******************************************************/
+
+                reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
+                reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride);
+                reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride);
+                reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride);
+                reg_8x16_src_r4 = vld1q_u8(pu1_in_pixel + 4 * u4_in_stride);
+                reg_8x16_src_r5 = vld1q_u8(pu1_in_pixel + 5 * u4_in_stride);
+                reg_8x16_src_r6 = vld1q_u8(pu1_in_pixel + 6 * u4_in_stride);
+                reg_8x16_src_r7 = vld1q_u8(pu1_in_pixel + 7 * u4_in_stride);
+
+                reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1);
+                reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3);
+                reg_8x16x2_src_r2 = vuzpq_u8(reg_8x16_src_r4, reg_8x16_src_r5);
+                reg_8x16x2_src_r3 = vuzpq_u8(reg_8x16_src_r6, reg_8x16_src_r7);
+
+                reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
+                reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0]));
+                reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0]));
+                reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0]));
+                reg_16x8_src_cb_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[0]));
+                reg_16x8_src_cb_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[0]));
+                reg_16x8_src_cb_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[0]));
+                reg_16x8_src_cb_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[0]));
+
+                reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
+                reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1]));
+                reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1]));
+                reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1]));
+                reg_16x8_src_cr_r4 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r2.val[1]));
+                reg_16x8_src_cr_r5 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r2.val[1]));
+                reg_16x8_src_cr_r6 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r3.val[1]));
+                reg_16x8_src_cr_r7 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r3.val[1]));
+
+                reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
+
+                reg_16x8_mul_cb_r0 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cb_r1 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cb_r2 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cb_r3 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cb_r4 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r4), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cb_r5 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r5), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cb_r6 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r6), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cb_r7 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r7), reg_16x8_filt_coeff_grid);
+
+                reg_16x8_mul_cr_r0 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cr_r1 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cr_r2 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cr_r3 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cr_r4 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r4), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cr_r5 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r5), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cr_r6 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r6), reg_16x8_filt_coeff_grid);
+                reg_16x8_mul_cr_r7 =
+                    vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r7), reg_16x8_filt_coeff_grid);
+
+                reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
+                reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1);
+                reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2);
+                reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3);
+                reg_32x4_sum_cb_r4 = vpaddlq_s16(reg_16x8_mul_cb_r4);
+                reg_32x4_sum_cb_r5 = vpaddlq_s16(reg_16x8_mul_cb_r5);
+                reg_32x4_sum_cb_r6 = vpaddlq_s16(reg_16x8_mul_cb_r6);
+                reg_32x4_sum_cb_r7 = vpaddlq_s16(reg_16x8_mul_cb_r7);
+
+                reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
+                reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1);
+                reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2);
+                reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3);
+                reg_32x4_sum_cr_r4 = vpaddlq_s16(reg_16x8_mul_cr_r4);
+                reg_32x4_sum_cr_r5 = vpaddlq_s16(reg_16x8_mul_cr_r5);
+                reg_32x4_sum_cr_r6 = vpaddlq_s16(reg_16x8_mul_cr_r6);
+                reg_32x4_sum_cr_r7 = vpaddlq_s16(reg_16x8_mul_cr_r7);
+
+                reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1);
+                reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3);
+                reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r4, reg_32x4_sum_cb_r5);
+                reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cb_r6, reg_32x4_sum_cb_r7);
+
+                reg_32x4_sum_cb_r01 =
+                    vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                reg_32x4_sum_cb_r23 =
+                    vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
+                reg_32x4_sum_cb_r45 =
+                    vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+                reg_32x4_sum_cb_r67 =
+                    vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
+
+                reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23);
+                reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cb_r45, reg_32x4_sum_cb_r67);
+                reg_32x4_sum_cb_r01 =
+                    vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                reg_32x4_sum_cb_r45 =
+                    vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+
+                reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1);
+                reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3);
+                reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r4, reg_32x4_sum_cr_r5);
+                reg_32x4x2_sum_r67 = vuzpq_s32(reg_32x4_sum_cr_r6, reg_32x4_sum_cr_r7);
+
+                reg_32x4_sum_cr_r01 =
+                    vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                reg_32x4_sum_cr_r23 =
+                    vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
+                reg_32x4_sum_cr_r45 =
+                    vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+                reg_32x4_sum_cr_r67 =
+                    vaddq_s32(reg_32x4x2_sum_r67.val[0], reg_32x4x2_sum_r67.val[1]);
+
+                reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23);
+                reg_32x4x2_sum_r45 = vuzpq_s32(reg_32x4_sum_cr_r45, reg_32x4_sum_cr_r67);
+                reg_32x4_sum_cr_r01 =
+                    vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                reg_32x4_sum_cr_r45 =
+                    vaddq_s32(reg_32x4x2_sum_r45.val[0], reg_32x4x2_sum_r45.val[1]);
+
+                reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7);
+                reg_16x4_sum_cb_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cb_r45, 7);
+
+                reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7);
+                reg_16x4_sum_cr_r45_67 = vqrshrun_n_s32(reg_32x4_sum_cr_r45, 7);
+
+                reg_16x8_sum_cb_r0_r7 =
+                    vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cb_r45_67);
+                reg_16x8_sum_cr_r0_r7 =
+                    vcombine_u16(reg_16x4_sum_cr_r01_23, reg_16x4_sum_cr_r45_67);
+
+                reg_8x8_sum_cb_r0_r7 = vqmovn_u16(reg_16x8_sum_cb_r0_r7);
+                reg_8x8_sum_cr_r0_r7 = vqmovn_u16(reg_16x8_sum_cr_r0_r7);
+
+                vst1_u8(pu1_out_pixel, reg_8x8_sum_cb_r0_r7);
+                vst1_u8(pu1_out_pixel + u4_out_stride, reg_8x8_sum_cr_r0_r7);
+
+                pu1_out_pixel += 8;
+
+                pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride << 3)) >> DOWNSCALER_Q;
+
+                /* Update the context for next Loop Count */
+                u4_center_pixel_pos += u4_src_horz_increments;
+            }
+        }
+
+        /* Loop for the remaining height less than 8 */
+        if(u4_rem_vert_loop)
+        {
+            u4_rem_vert_loop_by_4 = u4_rem_vert_loop >> 2;
+            u4_rem_vert_loop = u4_rem_vert_loop % 4;
+            u4_height_finished = (u4_num_iterations_vertical_by_8 << 3);
+            pu1_src_j = pu1_src + ((u4_height_finished) *u4_in_stride);
+            pu1_dst_j = pu1_dst + u4_height_finished;
+
+            u4_center_pixel_pos = u4_center_pixel_pos_src;
+
+            /* 4<= remaining height < 8 */
+            if(u4_rem_vert_loop_by_4)
+            {
+                for(i = 0; i < (WORD32) u4_blk_wd; i++)
+                {
+                    u1_phase = get_filter_phase(u4_center_pixel_pos);
+                    pi1_filter_grid = pai1_filters[u1_phase];
+
+                    u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+
+                    pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+
+                    pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                    reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
+
+                    for(j = u4_rem_vert_loop_by_4; j > 0; j--)
+                    {
+                        /******************************************************/
+                        /* This loop is going vertically in bottom direction */
+                        /* but the output pixels are stored in horizontal    */
+                        /* direction in transpose manner                     */
+                        /******************************************************/
+
+                        reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
+                        reg_8x16_src_r1 = vld1q_u8(pu1_in_pixel + u4_in_stride);
+                        reg_8x16_src_r2 = vld1q_u8(pu1_in_pixel + 2 * u4_in_stride);
+                        reg_8x16_src_r3 = vld1q_u8(pu1_in_pixel + 3 * u4_in_stride);
+
+                        reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r1);
+                        reg_8x16x2_src_r1 = vuzpq_u8(reg_8x16_src_r2, reg_8x16_src_r3);
+
+                        reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
+                        reg_16x8_src_cb_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[0]));
+                        reg_16x8_src_cb_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[0]));
+                        reg_16x8_src_cb_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[0]));
+
+                        reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
+                        reg_16x8_src_cr_r1 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r0.val[1]));
+                        reg_16x8_src_cr_r2 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r1.val[1]));
+                        reg_16x8_src_cr_r3 = vmovl_u8(vget_high_u8(reg_8x16x2_src_r1.val[1]));
+
+                        reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
+
+                        reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0),
+                                                       reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_cb_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r1),
+                                                       reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_cb_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r2),
+                                                       reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_cb_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r3),
+                                                       reg_16x8_filt_coeff_grid);
+
+                        reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0),
+                                                       reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_cr_r1 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r1),
+                                                       reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_cr_r2 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r2),
+                                                       reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_cr_r3 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r3),
+                                                       reg_16x8_filt_coeff_grid);
+
+                        reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
+                        reg_32x4_sum_cb_r1 = vpaddlq_s16(reg_16x8_mul_cb_r1);
+                        reg_32x4_sum_cb_r2 = vpaddlq_s16(reg_16x8_mul_cb_r2);
+                        reg_32x4_sum_cb_r3 = vpaddlq_s16(reg_16x8_mul_cb_r3);
+
+                        reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
+                        reg_32x4_sum_cr_r1 = vpaddlq_s16(reg_16x8_mul_cr_r1);
+                        reg_32x4_sum_cr_r2 = vpaddlq_s16(reg_16x8_mul_cr_r2);
+                        reg_32x4_sum_cr_r3 = vpaddlq_s16(reg_16x8_mul_cr_r3);
+
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cb_r1);
+                        reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cb_r2, reg_32x4_sum_cb_r3);
+                        reg_32x4_sum_cb_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                        reg_32x4_sum_cb_r23 =
+                            vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r01, reg_32x4_sum_cb_r23);
+                        reg_32x4_sum_cb_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r0, reg_32x4_sum_cr_r1);
+                        reg_32x4x2_sum_r23 = vuzpq_s32(reg_32x4_sum_cr_r2, reg_32x4_sum_cr_r3);
+                        reg_32x4_sum_cr_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+                        reg_32x4_sum_cr_r23 =
+                            vaddq_s32(reg_32x4x2_sum_r23.val[0], reg_32x4x2_sum_r23.val[1]);
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cr_r01, reg_32x4_sum_cr_r23);
+                        reg_32x4_sum_cr_r01 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+
+                        reg_16x4_sum_cb_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cb_r01, 7);
+                        reg_16x4_sum_cr_r01_23 = vqrshrun_n_s32(reg_32x4_sum_cr_r01, 7);
+
+                        reg_16x8_sum_cb_cr_r0_r3 =
+                            vcombine_u16(reg_16x4_sum_cb_r01_23, reg_16x4_sum_cr_r01_23);
+                        reg_8x8_sum_cb_cr_r0_r3 = vmovn_u16(reg_16x8_sum_cb_cr_r0_r3);
+                        vst1_lane_u32((uint32_t *) (pu1_out_pixel),
+                                      vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 0);
+                        vst1_lane_u32((uint32_t *) (pu1_out_pixel + u4_out_stride),
+                                      vreinterpret_u32_u8(reg_8x8_sum_cb_cr_r0_r3), 1);
+
+                        pu1_out_pixel += 4;
+
+                        pu1_in_pixel +=
+                            (u4_src_vert_increments * (u4_in_stride << 2)) >> DOWNSCALER_Q;
+                    }
+                    /* Update the context for next Loop Count */
+                    u4_center_pixel_pos += u4_src_horz_increments;
+                }
+            }
+
+            /* 1<= remaining height < 4 */
+            if(u4_rem_vert_loop)
+            {
+                u4_height_finished =
+                    ((u4_num_iterations_vertical_by_8 << 3) + (u4_rem_vert_loop_by_4 << 2));
+                pu1_src_j = pu1_src + u4_height_finished * u4_in_stride;
+                pu1_dst_j = pu1_dst + u4_height_finished;
+
+                u4_center_pixel_pos = u4_center_pixel_pos_src;
+                for(i = 0; i < (WORD32) u4_blk_wd; i++)
+                {
+                    u1_phase = get_filter_phase(u4_center_pixel_pos);
+                    pi1_filter_grid = pai1_filters[u1_phase];
+
+                    u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+
+                    pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+
+                    pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                    reg_8x8_filt_coeff_grid = vld1_s8(pi1_filter_grid);
+
+                    for(j = u4_rem_vert_loop; j > 0; j--)
+                    {
+                        /******************************************************/
+                        /* This loop is going vertically in bottom direction */
+                        /* but the output pixels are stored in horizontal    */
+                        /* direction in transpose manner                     */
+                        /******************************************************/
+
+                        reg_8x16_src_r0 = vld1q_u8(pu1_in_pixel);
+
+                        reg_8x16x2_src_r0 = vuzpq_u8(reg_8x16_src_r0, reg_8x16_src_r0);
+                        reg_16x8_src_cb_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[0]));
+                        reg_16x8_src_cr_r0 = vmovl_u8(vget_low_u8(reg_8x16x2_src_r0.val[1]));
+
+                        reg_16x8_filt_coeff_grid = vmovl_s8(reg_8x8_filt_coeff_grid);
+
+                        reg_16x8_mul_cb_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cb_r0),
+                                                       reg_16x8_filt_coeff_grid);
+                        reg_16x8_mul_cr_r0 = vmulq_s16(vreinterpretq_s16_u16(reg_16x8_src_cr_r0),
+                                                       reg_16x8_filt_coeff_grid);
+
+                        reg_32x4_sum_cb_r0 = vpaddlq_s16(reg_16x8_mul_cb_r0);
+                        reg_32x4_sum_cr_r0 = vpaddlq_s16(reg_16x8_mul_cr_r0);
+
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_r0, reg_32x4_sum_cr_r0);
+                        reg_32x4_sum_cb_cr_r0 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+
+                        reg_32x4x2_sum_r01 = vuzpq_s32(reg_32x4_sum_cb_cr_r0, reg_32x4_zero);
+                        reg_32x4_sum_cb_cr_r0 =
+                            vaddq_s32(reg_32x4x2_sum_r01.val[0], reg_32x4x2_sum_r01.val[1]);
+
+                        reg_16x4_sum_cb_cr_r0 = vqrshrun_n_s32(reg_32x4_sum_cb_cr_r0, 7);
+                        vst1_lane_u8((pu1_out_pixel), vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0),
+                                     0);
+                        vst1_lane_u8((pu1_out_pixel + u4_out_stride),
+                                     vreinterpret_u8_u16(reg_16x4_sum_cb_cr_r0), 2);
+
+                        pu1_out_pixel += 1;
+
+                        pu1_in_pixel += (u4_src_vert_increments * (u4_in_stride)) >> DOWNSCALER_Q;
+                    }
+
+                    /* Update the context for next Loop Count */
+                    u4_center_pixel_pos += u4_src_horz_increments;
+                }
+            }
+        }
+    }
+}
--- a/encoder/arm/svc/isvce_function_selector.c
+++ b/encoder/arm/svc/isvce_function_selector.c
@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in h264
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "isvc_defs.h"
+#include "ih264_size_defs.h"
+#include "isvce_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "isvc_inter_pred_filters.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_cabac_tables.h"
+#include "isvc_macros.h"
+#include "ih264_platform_macros.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "isvce_cabac.h"
+#include "ih264e_platform_macros.h"
+#include "isvce_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr(void *pv_codec)
+{
+    isvce_codec_t *ps_codec = (isvce_codec_t *) pv_codec;
+    isvce_init_function_ptr_generic(ps_codec);
+    switch(ps_codec->s_cfg.e_arch)
+    {
+#if defined(ARMV8)
+        case ARCH_ARM_A53:
+        case ARCH_ARM_A57:
+        case ARCH_ARM_V8_NEON:
+        default:
+            isvce_init_function_ptr_neon_av8(ps_codec);
+            break;
+#elif !defined(DISABLE_NEON)
+        case ARCH_ARM_A9Q:
+        case ARCH_ARM_A9A:
+        case ARCH_ARM_A9:
+        case ARCH_ARM_A7:
+        case ARCH_ARM_A5:
+        case ARCH_ARM_A15:
+        default:
+            isvce_init_function_ptr_neon_a9q(ps_codec);
+            break;
+#else
+        default:
+#endif
+        case ARCH_X86_GENERIC:
+            break;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T isvce_default_arch(void)
+{
+#if defined(ARMV8)
+    return ARCH_ARM_V8_NEON;
+#elif !defined(DISABLE_NEON)
+    return ARCH_ARM_A9Q;
+#else
+    return ARCH_GENERIC;
+#endif
+}
--- a/encoder/arm/svc/isvce_function_selector_a9q.c
+++ b/encoder/arm/svc/isvce_function_selector_a9q.c
@ -0,0 +1,270 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvce_function_selector_a9q.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - isvce_init_function_ptr_generic
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "isvc_defs.h"
+#include "ih264_size_defs.h"
+#include "isvce_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_cabac_tables.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "ih264e_platform_macros.h"
+#include "isvce_cabac.h"
+#include "isvce_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "isvce_cavlc.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec)
+{
+    WORD32 i = 0;
+
+    /* curr proc ctxt */
+    isvce_process_ctxt_t *ps_proc = NULL;
+    isvce_me_ctxt_t *ps_me_ctxt = NULL;
+    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
+    enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
+    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
+    mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 16x16 */
+    ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
+    ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
+    ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
+    ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 4x4 */
+    ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
+    ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
+    ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
+    ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
+    ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
+    ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
+    ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
+    ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
+    ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
+    ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
+    ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
+    ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
+    ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
+    ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
+    ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
+    ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
+
+    /* Init function pointers for intra pred leaf level functions chroma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
+    ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
+    ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
+    ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
+
+    /* Init forward transform fn ptr */
+    ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
+    ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
+    ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon;
+    ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] =
+        isvc_resi_trans_quant_4x4_with_residual_sub_neon;
+    ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon;
+    ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] =
+        isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
+
+    /* Init inverse transform fn ptr */
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
+        isvc_iquant_itrans_recon_4x4_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
+        isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
+        isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
+        isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
+        isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
+        isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
+        isvc_iquant_itrans_recon_chroma_4x4_neon;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
+
+    ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
+    ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
+
+    /* Init fn ptr luma core coding */
+    ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
+    ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
+    ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
+
+    /* Init fn ptr chroma core coding */
+    ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
+    ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
+
+    /* Init fn ptr luma deblocking */
+    ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
+    ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
+    ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
+    ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
+
+    /* Init fn ptr chroma deblocking */
+    ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
+    ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
+    ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
+    ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
+
+    /* write mb syntax layer */
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
+    ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
+
+    /* Padding Functions */
+    ps_codec->pf_pad_top = ih264_pad_top_a9q;
+    ps_codec->pf_pad_bottom = ih264_pad_bottom;
+    ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
+    ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
+    ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
+    ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
+
+    /* Inter pred leaf level functions */
+    ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
+    ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
+    ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
+    ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
+    ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
+
+    /* sad me level functions */
+    ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+    ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+    ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+
+    /* memor handling operations */
+    ps_mem_fxns->pf_mem_cpy = ih264_memcpy_a9q;
+    ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
+    ps_mem_fxns->pf_mem_set = ih264_memset_a9q;
+    ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
+
+    /* sad me level functions */
+    for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+    {
+        ps_proc = &ps_codec->as_process[i];
+        ps_me_ctxt = &ps_proc->s_me_ctxt;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+        ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+        ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
+        ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
+        ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
+        ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
+        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
+    }
+
+    /* intra mode eval -encoder level function */
+    ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
+    ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
+    ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
+}
--- a/encoder/arm/svc/isvce_function_selector_av8.c
+++ b/encoder/arm/svc/isvce_function_selector_av8.c
@ -0,0 +1,278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvce_function_selector_av8.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - isvce_init_function_ptr_generic
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "isvc_defs.h"
+#include "ih264_size_defs.h"
+#include "isvce_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_cabac_tables.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "ih264e_platform_macros.h"
+#include "isvce_cabac.h"
+#include "isvce_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "isvce_cavlc.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec)
+{
+    WORD32 i = 0;
+
+    /* curr proc ctxt */
+    isvce_process_ctxt_t *ps_proc = NULL;
+    isvce_me_ctxt_t *ps_me_ctxt = NULL;
+    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
+    enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
+    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
+    mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 16x16 */
+    ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8;
+    ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8;
+    ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8;
+    ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 4x4 */
+    ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8;
+    ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8;
+    ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8;
+    ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
+    ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
+    ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8;
+    ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8;
+    ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8;
+    ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8;
+    ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8;
+    ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
+    ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
+    ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8;
+    ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8;
+    ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8;
+    ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8;
+
+    /* Init function pointers for intra pred leaf level functions chroma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8;
+    ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8;
+    ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8;
+    ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8;
+
+    /* Init forward transform fn ptr */
+    ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
+    ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
+    ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4_neon;
+    ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] =
+        isvc_resi_trans_quant_4x4_with_residual_sub_neon;
+    ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4_neon;
+    ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] =
+        isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
+
+    /* Init inverse transform fn ptr */
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
+        isvc_iquant_itrans_recon_4x4_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
+        isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
+        isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
+        isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
+        isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
+        isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
+        isvc_iquant_itrans_recon_chroma_4x4_neon;
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
+
+    ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8;
+    ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8;
+
+    /* Init fn ptr luma core coding */
+    ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
+    ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
+    ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
+
+    /* Init fn ptr chroma core coding */
+    ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
+    ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
+
+    /* Init fn ptr luma deblocking */
+    ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8;
+    ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8;
+    ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8;
+    ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8;
+
+    /* Init fn ptr chroma deblocking */
+    ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8;
+    ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8;
+    ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8;
+    ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
+
+    /* write mb syntax layer */
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
+    ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
+
+    /* Padding Functions */
+    ps_codec->pf_pad_top = ih264_pad_top_av8;
+    ps_codec->pf_pad_bottom = ih264_pad_bottom;
+    ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8;
+    ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8;
+    ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8;
+    ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8;
+
+    /* Inter pred leaf level functions */
+    ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8;
+    ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8;
+    ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8;
+    ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+    ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8;
+
+    /* sad me level functions */
+    ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+    ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+    ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+
+    /* memor handling operations */
+    ps_mem_fxns->pf_mem_cpy = ih264_memcpy_av8;
+    ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8;
+    ps_mem_fxns->pf_mem_set = ih264_memset_av8;
+    ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_av8;
+
+    /* sad me level functions */
+    for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+    {
+        ps_proc = &ps_codec->as_process[i];
+        ps_me_ctxt = &ps_proc->s_me_ctxt;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+        ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+        ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8;
+        ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8;
+        ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8;
+        ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8;
+        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8;
+    }
+
+    /* intra mode eval -encoder level function */
+    ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8;
+    ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8;
+    ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+    /* csc */
+    ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+    ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+    /* Halp pel generation function - encoder level*/
+    ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8;
+    ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8;
+}
--- a/encoder/arm/svc/isvce_platform_macros.h
+++ b/encoder/arm/svc/isvce_platform_macros.h
@ -0,0 +1,139 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  ih264e_platform_macros.h
+*
+* @brief
+*  Contains platform specific routines used for codec context intialization
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_PLATFORM_MACROS_H_
+#define _ISVCE_PLATFORM_MACROS_H_
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T isvce_default_arch(void);
+
+#endif
--- a/encoder/arm/svc/isvce_rc_utils_neon.c
+++ b/encoder/arm/svc/isvce_rc_utils_neon.c
@ -0,0 +1,625 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file isvce_svc_rc_utils_neon.c
+*
+* @brief
+*  This file contains the neom SIMD version of the function which computes
+*  gradient per pixel value being used in Init Qp
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - isvce_get_gpp_neon()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#include <arm_neon.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_debug.h"
+#include "isvc_structs.h"
+#include "isvce_rc_utils_private_defs.h"
+
+/**
+*******************************************************************************
+*
+* @brief
+*   get gpp function
+*
+* @par Description:
+*   computes gradient per pixel value for a given frame
+*
+* @param[in] ps_input_buf
+*  pointer to yuv buffer properties
+*
+* @returns
+*  calculated gpp value
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+DOUBLE isvce_get_gpp_neon(yuv_buf_props_t *ps_input_buf)
+{
+    UWORD8 *pu1_input_buf;
+    UWORD32 i, j, k;
+    UWORD32 u4_width, u4_height, i4_input_stride;
+    DOUBLE d_gpp_y, d_gpp_u, d_gpp_v, d_gpp;
+
+    uint8x8_t reg_8x8_src_r0, reg_8x8_src_r1, reg_8x8_src_r2, reg_8x8_src_r3, reg_8x8_src_r4,
+        reg_8x8_src_r5, reg_8x8_src_r6, reg_8x8_src_r7, reg_8x8_src_r8;
+    uint8x8_t reg_8x8_src_right_r0, reg_8x8_src_right_r1, reg_8x8_src_right_r2,
+        reg_8x8_src_right_r3, reg_8x8_src_right_r4, reg_8x8_src_right_r5, reg_8x8_src_right_r6,
+        reg_8x8_src_right_r7;
+    uint16x8_t reg_16x8_abs_diff_y, reg_16x8_abs_diff_uv;
+    uint64x2_t reg_64x2_gpp_y, reg_64x2_gpp_uv;
+
+    uint8x8_t reg_8x8_shuffle = {0, 2, 4, 6, 1, 3, 5, 7};
+    uint16x8_t reg_16x8_and_mask_y = {0xffff, 0xffff, 0xffff, 0xffff,
+                                      0xffff, 0xffff, 0xffff, 0x0000};
+    uint16x8_t reg_16x8_and_mask_uv = {0xffff, 0xffff, 0xffff, 0x0000,
+                                       0xffff, 0xffff, 0xffff, 0x0000};
+    uint32x4_t reg_32x4_abs_diff_hadd_y = vdupq_n_u32(0);
+    uint32x4_t reg_32x4_abs_diff_hadd_uv = vdupq_n_u32(0);
+
+    d_gpp_y = 0;
+    d_gpp_u = 0;
+    d_gpp_v = 0;
+    d_gpp = 0;
+    pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data;
+    i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride;
+    u4_width = ps_input_buf->u4_width;
+    u4_height = ps_input_buf->u4_height;
+
+    ASSERT((u4_width % 8) == 0);
+
+    /***********************************************************/
+    /* For Luma -                                              */
+    /* This code block calculates gpp value for luma by adding */
+    /* the absolute difference between the current pixel and   */
+    /* it's immediate right pixel with the absolute difference */
+    /* between the current pixel and it's immediate bottom     */
+    /* pixel and accumulating for every pixel in the frame.    */
+    /***********************************************************/
+    /* -8 in the checks below since right column and bottow row being used for gradients, */
+    /* and last row and column are ignored for gradient computation. */
+    /* Note that input is not required to be padded */
+    for(i = 0; i < u4_height - 8; i += 8)
+    {
+        for(j = 0; j < u4_width - 8; j += 8)
+        {
+            reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+            reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+            reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+            reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+            reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+            reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
+            reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
+            reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
+            reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
+
+            reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
+            reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1);
+            reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1);
+            reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1);
+            reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 1);
+            reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 1);
+            reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 1);
+            reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 1);
+
+            reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8);
+
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7);
+
+            reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
+        }
+
+        /************************************************************/
+        /* Remaining width -                                        */
+        /* Since Last pixel is not getting processed, remaining 7   */
+        /* pixels are getting processed separately by performing    */
+        /* and operations with reg_16x8_and_mask_y                  */
+        /************************************************************/
+        ASSERT((u4_width - j) == 8);
+        reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+        reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+        reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+        reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+        reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+        reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
+        reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
+        reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
+        reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
+
+        reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
+        reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1);
+        reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1);
+        reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1);
+        reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 1);
+        reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 1);
+        reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 1);
+        reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 1);
+
+        reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_r5);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_r6);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_r7);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_r8);
+
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r4, reg_8x8_src_right_r4);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r5, reg_8x8_src_right_r5);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r6, reg_8x8_src_right_r6);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r7, reg_8x8_src_right_r7);
+
+        reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
+
+        reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
+
+        pu1_input_buf += (i4_input_stride * 8);
+    }
+
+    /* Loop for remaining height less than 8 */
+    /*    4 <= remaining_height < 8          */
+    for(k = i; k < u4_height - 4; k += 4, i += 4)
+    {
+        for(j = 0; j < u4_width - 8; j += 8)
+        {
+            reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+            reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+            reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+            reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+            reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+            reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
+            reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 1);
+            reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 1);
+            reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 1);
+
+            reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
+            reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
+
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
+
+            reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
+        }
+
+        /************************************************************/
+        /* Remaining width -                                        */
+        /* Since Last pixel is not getting processed, remaining 7   */
+        /* pixels are getting processed separately by performing    */
+        /* and operations with reg_16x8_and_mask_y                  */
+        /************************************************************/
+        ASSERT((u4_width - j) == 8);
+        reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+        reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+        reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+        reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+        reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+
+        reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
+        reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 1);
+        reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 1);
+        reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 1);
+
+        reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_r2);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_r3);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_r4);
+
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r1, reg_8x8_src_right_r1);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r2, reg_8x8_src_right_r2);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r3, reg_8x8_src_right_r3);
+
+        reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
+
+        reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
+
+        pu1_input_buf += (i4_input_stride * 4);
+    }
+
+    /* Loop for remaining height less than 4 */
+    /*    0 <= remaining_height < 4          */
+    for(k = i; k < u4_height - 1; k++)
+    {
+        for(j = 0; j < u4_width - 8; j += 8)
+        {
+            reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+            reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+            reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 1);
+
+            reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+            reg_16x8_abs_diff_y =
+                vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
+
+            reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
+        }
+
+        /************************************************************/
+        /* Remaining width -                                        */
+        /* Since Last pixel is not getting processed, remaining 7   */
+        /* pixels are getting processed separately by performing    */
+        /* and operations with reg_16x8_and_mask_y                  */
+        /************************************************************/
+        reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+        reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+        reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 1);
+
+        reg_16x8_abs_diff_y = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+        reg_16x8_abs_diff_y = vabal_u8(reg_16x8_abs_diff_y, reg_8x8_src_r0, reg_8x8_src_right_r0);
+
+        reg_16x8_abs_diff_y = vandq_u16(reg_16x8_abs_diff_y, reg_16x8_and_mask_y);
+
+        reg_32x4_abs_diff_hadd_y = vpadalq_u16(reg_32x4_abs_diff_hadd_y, reg_16x8_abs_diff_y);
+
+        pu1_input_buf += i4_input_stride;
+    }
+
+    /* Pairwise add reg_32x4_abs_diff_hadd_y to get final gpp value */
+    reg_64x2_gpp_y = vpaddlq_u32(reg_32x4_abs_diff_hadd_y);
+    d_gpp_y = vgetq_lane_u64(reg_64x2_gpp_y, 0);
+    d_gpp_y += vgetq_lane_u64(reg_64x2_gpp_y, 1);
+
+    pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data;
+    i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride;
+
+    /***************************************************************/
+    /* For Chroma -                                                */
+    /* This code block first deinterleaves the Cb and Cr values,   */
+    /* calculates gpp value for both Cb and Cr separately by       */
+    /* adding the absolute difference between the current pixel    */
+    /* and it's immediate right pixel with the absolute            */
+    /* difference between the current pixel and it's immediate     */
+    /* bottom pixel and accumulating for every pixel in the frame. */
+    /***************************************************************/
+    for(i = 0; i < (u4_height >> 1) - 8; i += 8)
+    {
+        for(j = 0; j < u4_width - 8; j += 8)
+        {
+            reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+            reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+            reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+            reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+            reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+            reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
+            reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
+            reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
+            reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
+
+            reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
+            reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2);
+            reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2);
+            reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2);
+            reg_8x8_src_right_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j + 2);
+            reg_8x8_src_right_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j + 2);
+            reg_8x8_src_right_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j + 2);
+            reg_8x8_src_right_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j + 2);
+
+            /* separating u and v */
+            reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
+            reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
+            reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
+            reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
+            reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
+            reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle);
+            reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle);
+            reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle);
+            reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle);
+            reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
+            reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
+            reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
+            reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
+            reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle);
+            reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle);
+            reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle);
+            reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle);
+
+            reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7);
+
+            reg_32x4_abs_diff_hadd_uv =
+                vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
+        }
+
+        /************************************************************/
+        /* Remaining width -                                        */
+        /* Since Last pixel is not getting processed, remaining 6   */
+        /* pixels are getting processed separately by performing    */
+        /* and operations with reg_16x8_and_mask_uv                 */
+        /************************************************************/
+        ASSERT((u4_width - j) == 8);
+        reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+        reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+        reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+        reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+        reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+        reg_8x8_src_r5 = vld1_u8(pu1_input_buf + (i4_input_stride * 5) + j);
+        reg_8x8_src_r6 = vld1_u8(pu1_input_buf + (i4_input_stride * 6) + j);
+        reg_8x8_src_r7 = vld1_u8(pu1_input_buf + (i4_input_stride * 7) + j);
+        reg_8x8_src_r8 = vld1_u8(pu1_input_buf + (i4_input_stride * 8) + j);
+        reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
+        reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2);
+        reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2);
+        reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2);
+        reg_8x8_src_right_r4 = vext_u8(reg_8x8_src_r4, reg_8x8_src_r4, 2);
+        reg_8x8_src_right_r5 = vext_u8(reg_8x8_src_r5, reg_8x8_src_r5, 2);
+        reg_8x8_src_right_r6 = vext_u8(reg_8x8_src_r6, reg_8x8_src_r6, 2);
+        reg_8x8_src_right_r7 = vext_u8(reg_8x8_src_r7, reg_8x8_src_r7, 2);
+
+        /* separating u and v */
+        reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
+        reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
+        reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
+        reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
+        reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
+        reg_8x8_src_r5 = vtbl1_u8(reg_8x8_src_r5, reg_8x8_shuffle);
+        reg_8x8_src_r6 = vtbl1_u8(reg_8x8_src_r6, reg_8x8_shuffle);
+        reg_8x8_src_r7 = vtbl1_u8(reg_8x8_src_r7, reg_8x8_shuffle);
+        reg_8x8_src_r8 = vtbl1_u8(reg_8x8_src_r8, reg_8x8_shuffle);
+        reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
+        reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
+        reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
+        reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
+        reg_8x8_src_right_r4 = vtbl1_u8(reg_8x8_src_right_r4, reg_8x8_shuffle);
+        reg_8x8_src_right_r5 = vtbl1_u8(reg_8x8_src_right_r5, reg_8x8_shuffle);
+        reg_8x8_src_right_r6 = vtbl1_u8(reg_8x8_src_right_r6, reg_8x8_shuffle);
+        reg_8x8_src_right_r7 = vtbl1_u8(reg_8x8_src_right_r7, reg_8x8_shuffle);
+
+        reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_r5);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_r6);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_r7);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_r8);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r4, reg_8x8_src_right_r4);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r5, reg_8x8_src_right_r5);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r6, reg_8x8_src_right_r6);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r7, reg_8x8_src_right_r7);
+
+        reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
+
+        reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
+
+        pu1_input_buf += (i4_input_stride * 8);
+    }
+
+    /* Loop for remaining height less than 8 */
+    /*    4 <= remaining_height < 8          */
+    for(k = i; k < (u4_height >> 1) - 4; k += 4, i += 4)
+    {
+        for(j = 0; j < u4_width - 8; j += 8)
+        {
+            reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+            reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+            reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+            reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+            reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+            reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
+            reg_8x8_src_right_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j + 2);
+            reg_8x8_src_right_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j + 2);
+            reg_8x8_src_right_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j + 2);
+
+            /* separating u and v */
+            reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
+            reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
+            reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
+            reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
+            reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
+            reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
+            reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
+            reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
+            reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
+
+            reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
+            reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
+
+            reg_32x4_abs_diff_hadd_uv =
+                vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
+        }
+
+        /************************************************************/
+        /* Remaining width -                                        */
+        /* Since Last pixel is not getting processed, remaining 6   */
+        /* pixels are getting processed separately by performing    */
+        /* and operations with reg_16x8_and_mask_uv                 */
+        /************************************************************/
+        reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+        reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+        reg_8x8_src_r2 = vld1_u8(pu1_input_buf + (i4_input_stride * 2) + j);
+        reg_8x8_src_r3 = vld1_u8(pu1_input_buf + (i4_input_stride * 3) + j);
+        reg_8x8_src_r4 = vld1_u8(pu1_input_buf + (i4_input_stride * 4) + j);
+        reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
+        reg_8x8_src_right_r1 = vext_u8(reg_8x8_src_r1, reg_8x8_src_r1, 2);
+        reg_8x8_src_right_r2 = vext_u8(reg_8x8_src_r2, reg_8x8_src_r2, 2);
+        reg_8x8_src_right_r3 = vext_u8(reg_8x8_src_r3, reg_8x8_src_r3, 2);
+
+        /* separating u and v */
+        reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
+        reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
+        reg_8x8_src_r2 = vtbl1_u8(reg_8x8_src_r2, reg_8x8_shuffle);
+        reg_8x8_src_r3 = vtbl1_u8(reg_8x8_src_r3, reg_8x8_shuffle);
+        reg_8x8_src_r4 = vtbl1_u8(reg_8x8_src_r4, reg_8x8_shuffle);
+        reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
+        reg_8x8_src_right_r1 = vtbl1_u8(reg_8x8_src_right_r1, reg_8x8_shuffle);
+        reg_8x8_src_right_r2 = vtbl1_u8(reg_8x8_src_right_r2, reg_8x8_shuffle);
+        reg_8x8_src_right_r3 = vtbl1_u8(reg_8x8_src_right_r3, reg_8x8_shuffle);
+
+        reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_r2);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_r3);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_r4);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r1, reg_8x8_src_right_r1);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r2, reg_8x8_src_right_r2);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r3, reg_8x8_src_right_r3);
+
+        reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
+
+        reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
+
+        pu1_input_buf += (i4_input_stride * 4);
+    }
+
+    /* Loop for remaining height less than 4 */
+    /*    0 <= remaining_height < 4          */
+    for(k = i; k < (u4_height >> 1) - 1; k++)
+    {
+        for(j = 0; j < u4_width - 8; j += 8)
+        {
+            reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+            reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+            reg_8x8_src_right_r0 = vld1_u8(pu1_input_buf + j + 2);
+
+            /* separating u and v */
+            reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
+            reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
+            reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
+
+            reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+            reg_16x8_abs_diff_uv =
+                vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
+
+            reg_32x4_abs_diff_hadd_uv =
+                vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
+        }
+
+        /************************************************************/
+        /* Remaining width -                                        */
+        /* Since Last pixel is not getting processed, remaining 6   */
+        /* pixels are getting processed separately by performing    */
+        /* and operations with reg_16x8_and_mask_uv                 */
+        /************************************************************/
+        reg_8x8_src_r0 = vld1_u8(pu1_input_buf + j);
+        reg_8x8_src_r1 = vld1_u8(pu1_input_buf + i4_input_stride + j);
+        reg_8x8_src_right_r0 = vext_u8(reg_8x8_src_r0, reg_8x8_src_r0, 2);
+
+        /* separating u and v */
+        reg_8x8_src_r0 = vtbl1_u8(reg_8x8_src_r0, reg_8x8_shuffle);
+        reg_8x8_src_r1 = vtbl1_u8(reg_8x8_src_r1, reg_8x8_shuffle);
+        reg_8x8_src_right_r0 = vtbl1_u8(reg_8x8_src_right_r0, reg_8x8_shuffle);
+
+        reg_16x8_abs_diff_uv = vabdl_u8(reg_8x8_src_r0, reg_8x8_src_r1);
+        reg_16x8_abs_diff_uv = vabal_u8(reg_16x8_abs_diff_uv, reg_8x8_src_r0, reg_8x8_src_right_r0);
+
+        reg_16x8_abs_diff_uv = vandq_u16(reg_16x8_abs_diff_uv, reg_16x8_and_mask_uv);
+
+        reg_32x4_abs_diff_hadd_uv = vpadalq_u16(reg_32x4_abs_diff_hadd_uv, reg_16x8_abs_diff_uv);
+
+        pu1_input_buf += i4_input_stride;
+    }
+
+    /* Pairwise add u4_abd_hadd_uv to get final gpp_u and gpp_v value */
+    reg_64x2_gpp_uv = vpaddlq_u32(reg_32x4_abs_diff_hadd_uv);
+    d_gpp_u = vgetq_lane_u64(reg_64x2_gpp_uv, 0);
+    d_gpp_v = vgetq_lane_u64(reg_64x2_gpp_uv, 1);
+
+    d_gpp_y /= (u4_width * u4_height);
+    d_gpp_u /= ((u4_width / 2) * (u4_height / 2));
+    d_gpp_v /= ((u4_width / 2) * (u4_height / 2));
+
+    d_gpp = (DOUBLE) ((WT_LUMA_GPP * d_gpp_y) + d_gpp_u + d_gpp_v) / WT_TOTAL_GPP;
+
+    return d_gpp;
+}
--- a/encoder/arm/svc/isvce_residual_pred_neon.c
+++ b/encoder/arm/svc/isvce_residual_pred_neon.c
@ -0,0 +1,666 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+*
+* @file
+*  isvce_svc_residual_pred_neon.c
+*
+* @brief
+*  Contains functions
+* used for SVC residual
+* prediction
+*
+*******************************************************************************
+*/
+#include <arm_neon.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_size_defs.h"
+#include "isvc_macros.h"
+#include "isvc_structs.h"
+
+void isvce_luma_residual_sampler_2x_neon(coordinates_t *ps_ref_array_positions,
+                                         coordinates_t *ps_ref_array_phases,
+                                         buffer_container_t *ps_inp, buffer_container_t *ps_out,
+                                         buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz,
+                                         UWORD8 u1_ref_tx_size)
+{
+    WORD16 *pi2_inp_data = (WORD16 *) ps_inp->pv_data;
+    WORD16 *pi2_out_res = (WORD16 *) ps_out->pv_data;
+    WORD32 i4_inp_data_stride = ps_inp->i4_data_stride;
+    WORD32 i4_out_res_stride = ps_out->i4_data_stride;
+    WORD16 *pi2_refarray_buffer = (WORD16 *) ps_scratch->pv_data;
+    WORD32 i4_blk_ctr;
+
+    UNUSED(ps_ref_array_positions);
+    UNUSED(ps_ref_array_phases);
+
+    /* For 2x scaling, offsets always point to TL pixel outside MB */
+    /* Hence, refTransBlkIdc will be different and since phase */
+    /* for first refArray pos for horiz filtering samples > 8, */
+    /* first row and first column from the refArray is never used */
+    pi2_inp_data += 1 + i4_inp_data_stride;
+
+    if((u1_ref_tx_size) && (0 != u4_ref_nnz))
+    {
+        WORD16 *pi2_ref_data_byte;
+        WORD32 *pi4_ref_array;
+        WORD32 i4_i, i4_j;
+
+        /* ----------- Horizontal Interpolation ---------------- */
+        int16x8_t i2_coeff_add_16x8_r0;
+        int16x8_t i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1;
+        int16x8_t i2_coeff_16x8_sl_r0_0, i2_coeff_16x8_sl_r0_1;
+        int16x8_t result_16x8_r0_0, result_16x8_r0_1;
+
+        int16x8_t i2_coeff_add_16x8_r1;
+        int16x8_t i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1;
+        int16x8_t i2_coeff_16x8_sl_r1_0, i2_coeff_16x8_sl_r1_1;
+        int16x8_t result_16x8_r1_0, result_16x8_r1_1;
+        int16x8x2_t final_result_16x8x2_r0, final_result_16x8x2_r1;
+
+        pi2_ref_data_byte = pi2_inp_data;
+
+        /* ----------- Horizontal Interpolation ---------------- */
+        pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
+
+        for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i += 2)
+        {
+            i2_coeff_16x8_r0_0 = vld1q_s16(pi2_ref_data_byte);
+            i2_coeff_16x8_r0_1 = vld1q_s16((pi2_ref_data_byte + 1));
+
+            i2_coeff_16x8_r1_0 = vld1q_s16(pi2_ref_data_byte + i4_inp_data_stride);
+            i2_coeff_16x8_r1_1 = vld1q_s16((pi2_ref_data_byte + i4_inp_data_stride + 1));
+
+            i2_coeff_add_16x8_r0 = vaddq_s16(i2_coeff_16x8_r0_0, i2_coeff_16x8_r0_1);
+            i2_coeff_16x8_sl_r0_0 = vshlq_n_s16(i2_coeff_16x8_r0_0, 1);
+            i2_coeff_16x8_sl_r0_1 = vshlq_n_s16(i2_coeff_16x8_r0_1, 1);
+
+            i2_coeff_add_16x8_r1 = vaddq_s16(i2_coeff_16x8_r1_0, i2_coeff_16x8_r1_1);
+            i2_coeff_16x8_sl_r1_0 = vshlq_n_s16(i2_coeff_16x8_r1_0, 1);
+            i2_coeff_16x8_sl_r1_1 = vshlq_n_s16(i2_coeff_16x8_r1_1, 1);
+
+            result_16x8_r0_0 = vaddq_s16(i2_coeff_16x8_sl_r0_0, i2_coeff_add_16x8_r0);
+            result_16x8_r0_1 = vaddq_s16(i2_coeff_16x8_sl_r0_1, i2_coeff_add_16x8_r0);
+
+            result_16x8_r1_0 = vaddq_s16(i2_coeff_16x8_sl_r1_0, i2_coeff_add_16x8_r1);
+            result_16x8_r1_1 = vaddq_s16(i2_coeff_16x8_sl_r1_1, i2_coeff_add_16x8_r1);
+
+            final_result_16x8x2_r0 = vzipq_s16(result_16x8_r0_0, result_16x8_r0_1);
+            final_result_16x8x2_r1 = vzipq_s16(result_16x8_r1_0, result_16x8_r1_1);
+
+            vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[0])));
+            vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[0])));
+            vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r0.val[1])));
+            vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r0.val[1])));
+
+            pi4_ref_array[0] = pi2_ref_data_byte[0] << 2;
+            pi4_ref_array[15] = pi2_ref_data_byte[7] << 2;
+            pi4_ref_array += 16;
+            pi2_ref_data_byte += i4_inp_data_stride;
+
+            vst1q_s32(pi4_ref_array + 1, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[0])));
+            vst1q_s32(pi4_ref_array + 5, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[0])));
+            vst1q_s32(pi4_ref_array + 9, vmovl_s16(vget_low_s16(final_result_16x8x2_r1.val[1])));
+            vst1q_s32(pi4_ref_array + 13, vmovl_s16(vget_high_s16(final_result_16x8x2_r1.val[1])));
+
+            pi4_ref_array[0] = pi2_ref_data_byte[0] << 2;
+            pi4_ref_array[15] = pi2_ref_data_byte[7] << 2;
+            pi4_ref_array += 16;
+            /* vertical loop updates */
+            pi2_ref_data_byte = pi2_inp_data + ((i4_i + 2) * i4_inp_data_stride);
+        }
+
+        /* ----------- Vertical Interpolation ---------------- */
+        pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
+        {
+            WORD32 *pi4_ref_array_temp;
+            WORD16 *pi2_out;
+            int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r1_3,
+                i4_horz_samp_32x4_r1_4;
+            int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2, i4_horz_samp_32x4_r2_3,
+                i4_horz_samp_32x4_r2_4;
+
+            int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2, i4_horz_res_32x4_r1_3,
+                i4_horz_res_32x4_r1_4;
+            int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2, i4_horz_res_32x4_r2_3,
+                i4_horz_res_32x4_r2_4;
+            int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2, i4_horz_res_32x4_r3_3,
+                i4_horz_res_32x4_r3_4;
+            int32x4_t horz_add_32x4_r2_1, horz_add_32x4_r2_2, horz_add_32x4_r2_3,
+                horz_add_32x4_r2_4;
+
+            int16x8_t comb_horz_16x8_1, comb_horz_16x8_2, comb_horz_16x8_3, comb_horz_16x8_4;
+            pi4_ref_array_temp = pi4_ref_array;
+            pi2_out = pi2_out_res;
+
+            i4_horz_samp_32x4_r1_1 = vld1q_s32(pi4_ref_array_temp);
+            i4_horz_samp_32x4_r1_2 = vld1q_s32(pi4_ref_array_temp + 4);
+            i4_horz_samp_32x4_r1_3 = vld1q_s32(pi4_ref_array_temp + 8);
+            i4_horz_samp_32x4_r1_4 = vld1q_s32(pi4_ref_array_temp + 12);
+
+            /* populate the first inter sample */
+            i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2);
+            i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2);
+            i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2);
+            i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2);
+
+            comb_horz_16x8_1 =
+                vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2));
+            comb_horz_16x8_2 =
+                vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4));
+            vst1q_s16(pi2_out, comb_horz_16x8_1);
+            vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
+
+            pi2_out += i4_out_res_stride;
+
+            for(i4_j = 0; i4_j < 14; i4_j += 2)
+            {
+                pi4_ref_array_temp += MB_SIZE;
+                i4_horz_samp_32x4_r2_1 = vld1q_s32(pi4_ref_array_temp);
+                i4_horz_samp_32x4_r2_2 = vld1q_s32(pi4_ref_array_temp + 4);
+                i4_horz_samp_32x4_r2_3 = vld1q_s32(pi4_ref_array_temp + 8);
+                i4_horz_samp_32x4_r2_4 = vld1q_s32(pi4_ref_array_temp + 12);
+
+                horz_add_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r2_1);
+                horz_add_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_samp_32x4_r2_2);
+                horz_add_32x4_r2_3 = vaddq_s32(i4_horz_samp_32x4_r1_3, i4_horz_samp_32x4_r2_3);
+                horz_add_32x4_r2_4 = vaddq_s32(i4_horz_samp_32x4_r1_4, i4_horz_samp_32x4_r2_4);
+
+                i4_horz_res_32x4_r2_1 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_1, 1), horz_add_32x4_r2_1);
+                i4_horz_res_32x4_r2_2 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_2, 1), horz_add_32x4_r2_2);
+                i4_horz_res_32x4_r2_3 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_3, 1), horz_add_32x4_r2_3);
+                i4_horz_res_32x4_r2_4 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r1_4, 1), horz_add_32x4_r2_4);
+
+                i4_horz_res_32x4_r3_1 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_1, 1), horz_add_32x4_r2_1);
+                i4_horz_res_32x4_r3_2 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_2, 1), horz_add_32x4_r2_2);
+                i4_horz_res_32x4_r3_3 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_3, 1), horz_add_32x4_r2_3);
+                i4_horz_res_32x4_r3_4 =
+                    vaddq_s32(vshlq_n_s32(i4_horz_samp_32x4_r2_4, 1), horz_add_32x4_r2_4);
+
+                i4_horz_res_32x4_r2_1 = vrshrq_n_s32(i4_horz_res_32x4_r2_1, 4);
+                i4_horz_res_32x4_r2_2 = vrshrq_n_s32(i4_horz_res_32x4_r2_2, 4);
+                i4_horz_res_32x4_r2_3 = vrshrq_n_s32(i4_horz_res_32x4_r2_3, 4);
+                i4_horz_res_32x4_r2_4 = vrshrq_n_s32(i4_horz_res_32x4_r2_4, 4);
+
+                i4_horz_res_32x4_r3_1 = vrshrq_n_s32(i4_horz_res_32x4_r3_1, 4);
+                i4_horz_res_32x4_r3_2 = vrshrq_n_s32(i4_horz_res_32x4_r3_2, 4);
+                i4_horz_res_32x4_r3_3 = vrshrq_n_s32(i4_horz_res_32x4_r3_3, 4);
+                i4_horz_res_32x4_r3_4 = vrshrq_n_s32(i4_horz_res_32x4_r3_4, 4);
+
+                comb_horz_16x8_1 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_1),
+                                                vmovn_s32(i4_horz_res_32x4_r2_2));
+                comb_horz_16x8_2 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r2_3),
+                                                vmovn_s32(i4_horz_res_32x4_r2_4));
+
+                comb_horz_16x8_3 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_1),
+                                                vmovn_s32(i4_horz_res_32x4_r3_2));
+                comb_horz_16x8_4 = vcombine_s16(vmovn_s32(i4_horz_res_32x4_r3_3),
+                                                vmovn_s32(i4_horz_res_32x4_r3_4));
+
+                /* populate 2 samples based on current coeffs */
+                vst1q_s16(pi2_out, comb_horz_16x8_1);
+                vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
+                pi2_out += i4_out_res_stride;
+
+                vst1q_s16(pi2_out, comb_horz_16x8_3);
+                vst1q_s16(pi2_out + 8, comb_horz_16x8_4);
+                pi2_out += i4_out_res_stride;
+
+                /* store the coeff 2 to coeff 1 */
+                /* (used in next iteration)     */
+                i4_horz_samp_32x4_r1_1 = i4_horz_samp_32x4_r2_1;
+                i4_horz_samp_32x4_r1_2 = i4_horz_samp_32x4_r2_2;
+                i4_horz_samp_32x4_r1_3 = i4_horz_samp_32x4_r2_3;
+                i4_horz_samp_32x4_r1_4 = i4_horz_samp_32x4_r2_4;
+            }
+
+            /* populate the first inter sample */
+            i4_horz_res_32x4_r1_1 = vrshrq_n_s32(i4_horz_samp_32x4_r1_1, 2);
+            i4_horz_res_32x4_r1_2 = vrshrq_n_s32(i4_horz_samp_32x4_r1_2, 2);
+            i4_horz_res_32x4_r1_3 = vrshrq_n_s32(i4_horz_samp_32x4_r1_3, 2);
+            i4_horz_res_32x4_r1_4 = vrshrq_n_s32(i4_horz_samp_32x4_r1_4, 2);
+
+            comb_horz_16x8_1 =
+                vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_1), vmovn_s32(i4_horz_res_32x4_r1_2));
+            comb_horz_16x8_2 =
+                vcombine_s16(vmovn_s32(i4_horz_res_32x4_r1_3), vmovn_s32(i4_horz_res_32x4_r1_4));
+            vst1q_s16(pi2_out, comb_horz_16x8_1);
+            vst1q_s16(pi2_out + 8, comb_horz_16x8_2);
+
+            /* horizontal loop updates */
+            pi4_ref_array++;
+            pi2_out_res++;
+        }
+    }
+    else
+    {
+        /* ----------------------------------------------------------------- */
+        /* LOOP over number of blocks                                        */
+        /* ----------------------------------------------------------------- */
+        for(i4_blk_ctr = 0; i4_blk_ctr < 4; i4_blk_ctr++)
+        {
+            /* if reference layer is not coded then no processing */
+            if(0 != (u4_ref_nnz & 0x1))
+            {
+                int16x8_t i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_1;
+                int16x8_t i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_1;
+                int16x8_t i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_1;
+                int16x8_t i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_1;
+                int16x8_t i2_add_16x8_r0_0;
+                int16x8_t i2_add_16x8_r1_0;
+                int16x8_t i2_add_16x8_r2_0;
+                int16x8_t i2_add_16x8_r3_0;
+                int16x8_t i2_res_16x8_r0_0, i2_res_16x8_r0_1;
+                int16x8_t i2_res_16x8_r1_0, i2_res_16x8_r1_1;
+                int16x8_t i2_res_16x8_r2_0, i2_res_16x8_r2_1;
+                int16x8_t i2_res_16x8_r3_0, i2_res_16x8_r3_1;
+                int16x4_t i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r0_2;
+                int16x4_t i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r1_2;
+                int16x4_t i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r2_2;
+                int16x4_t i4_horz_samp_16x4_r3_1, i4_horz_samp_16x4_r3_2;
+                int32x4_t i4_horz_samp_32x4_r0_1, i4_horz_samp_32x4_r0_2;
+                int32x4_t i4_horz_samp_32x4_r1_1, i4_horz_samp_32x4_r1_2;
+                int32x4_t i4_horz_samp_32x4_r2_1, i4_horz_samp_32x4_r2_2;
+                int32x4_t i4_horz_samp_32x4_r3_1, i4_horz_samp_32x4_r3_2;
+                int32x4_t i4_horz_add_32x4_r1_1, i4_horz_add_32x4_r1_2;
+                int32x4_t i4_horz_add_32x4_r2_1, i4_horz_add_32x4_r2_2;
+                int32x4_t i4_horz_add_32x4_r3_1, i4_horz_add_32x4_r3_2;
+                int16x4_t i4_horz_res_16x4_r0_1, i4_horz_res_16x4_r0_2;
+                int16x4_t i4_horz_res_16x4_r1_1, i4_horz_res_16x4_r1_2;
+                int16x4_t i4_horz_res_16x4_r2_1, i4_horz_res_16x4_r2_2;
+                int16x4_t i4_horz_res_16x4_r3_1, i4_horz_res_16x4_r3_2;
+                int16x4_t i4_horz_res_16x4_r4_1, i4_horz_res_16x4_r4_2;
+                int16x4_t i4_horz_res_16x4_r5_1, i4_horz_res_16x4_r5_2;
+                int16x4_t i4_horz_res_16x4_r6_1, i4_horz_res_16x4_r6_2;
+                int16x4_t i4_horz_res_16x4_r7_1, i4_horz_res_16x4_r7_2;
+                int32x4_t i4_horz_res_32x4_r1_1, i4_horz_res_32x4_r1_2;
+                int32x4_t i4_horz_res_32x4_r2_1, i4_horz_res_32x4_r2_2;
+                int32x4_t i4_horz_res_32x4_r3_1, i4_horz_res_32x4_r3_2;
+                int32x4_t i4_horz_res_32x4_r4_1, i4_horz_res_32x4_r4_2;
+                int32x4_t i4_horz_res_32x4_r5_1, i4_horz_res_32x4_r5_2;
+                int32x4_t i4_horz_res_32x4_r6_1, i4_horz_res_32x4_r6_2;
+                int16x8x2_t ti2_res_16x8x2_r0, ti2_res_16x8x2_r1;
+                int16x8x2_t ti2_res_16x8x2_r2, ti2_res_16x8x2_r3;
+
+                i2_coeff1_16x8_r0_0 = vld1q_s16(pi2_inp_data);
+                i2_coeff1_16x8_r1_0 = vld1q_s16(pi2_inp_data + i4_inp_data_stride);
+                i2_coeff1_16x8_r2_0 = vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1));
+                i2_coeff1_16x8_r3_0 =
+                    vld1q_s16(pi2_inp_data + (i4_inp_data_stride << 1) + i4_inp_data_stride);
+
+                i2_coeff1_16x8_r0_1 = vextq_s16(i2_coeff1_16x8_r0_0, i2_coeff1_16x8_r0_0, 1);
+                i2_coeff1_16x8_r1_1 = vextq_s16(i2_coeff1_16x8_r1_0, i2_coeff1_16x8_r1_0, 1);
+                i2_coeff1_16x8_r2_1 = vextq_s16(i2_coeff1_16x8_r2_0, i2_coeff1_16x8_r2_0, 1);
+                i2_coeff1_16x8_r3_1 = vextq_s16(i2_coeff1_16x8_r3_0, i2_coeff1_16x8_r3_0, 1);
+
+                i2_add_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_coeff1_16x8_r0_0);
+                i2_add_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_coeff1_16x8_r1_0);
+                i2_add_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_coeff1_16x8_r2_0);
+                i2_add_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_coeff1_16x8_r3_0);
+
+                i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1);
+                i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1);
+                i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1);
+                i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1);
+
+                i2_coeff1_16x8_r0_1 = vshlq_n_s16(i2_coeff1_16x8_r0_1, 1);
+                i2_coeff1_16x8_r1_1 = vshlq_n_s16(i2_coeff1_16x8_r1_1, 1);
+                i2_coeff1_16x8_r2_1 = vshlq_n_s16(i2_coeff1_16x8_r2_1, 1);
+                i2_coeff1_16x8_r3_1 = vshlq_n_s16(i2_coeff1_16x8_r3_1, 1);
+
+                i2_res_16x8_r0_0 = vaddq_s16(i2_coeff1_16x8_r0_0, i2_add_16x8_r0_0);
+                i2_res_16x8_r1_0 = vaddq_s16(i2_coeff1_16x8_r1_0, i2_add_16x8_r1_0);
+                i2_res_16x8_r2_0 = vaddq_s16(i2_coeff1_16x8_r2_0, i2_add_16x8_r2_0);
+                i2_res_16x8_r3_0 = vaddq_s16(i2_coeff1_16x8_r3_0, i2_add_16x8_r3_0);
+
+                i2_res_16x8_r0_1 = vaddq_s16(i2_coeff1_16x8_r0_1, i2_add_16x8_r0_0);
+                i2_res_16x8_r1_1 = vaddq_s16(i2_coeff1_16x8_r1_1, i2_add_16x8_r1_0);
+                i2_res_16x8_r2_1 = vaddq_s16(i2_coeff1_16x8_r2_1, i2_add_16x8_r2_0);
+                i2_res_16x8_r3_1 = vaddq_s16(i2_coeff1_16x8_r3_1, i2_add_16x8_r3_0);
+
+                ti2_res_16x8x2_r0 = vzipq_s16(i2_res_16x8_r0_0, i2_res_16x8_r0_1);
+                ti2_res_16x8x2_r1 = vzipq_s16(i2_res_16x8_r1_0, i2_res_16x8_r1_1);
+                ti2_res_16x8x2_r2 = vzipq_s16(i2_res_16x8_r2_0, i2_res_16x8_r2_1);
+                ti2_res_16x8x2_r3 = vzipq_s16(i2_res_16x8_r3_0, i2_res_16x8_r3_1);
+
+                i2_coeff1_16x8_r0_0 = vshlq_n_s16(i2_coeff1_16x8_r0_0, 1);
+                i2_coeff1_16x8_r1_0 = vshlq_n_s16(i2_coeff1_16x8_r1_0, 1);
+                i2_coeff1_16x8_r2_0 = vshlq_n_s16(i2_coeff1_16x8_r2_0, 1);
+                i2_coeff1_16x8_r3_0 = vshlq_n_s16(i2_coeff1_16x8_r3_0, 1);
+
+                vst1q_s16(pi2_refarray_buffer + 1, ti2_res_16x8x2_r0.val[0]);
+                vst1q_lane_s16(pi2_refarray_buffer, i2_coeff1_16x8_r0_0, 0);
+                vst1q_lane_s16(pi2_refarray_buffer + 7, i2_coeff1_16x8_r0_0, 3);
+
+                vst1q_s16(pi2_refarray_buffer + 9, ti2_res_16x8x2_r1.val[0]);
+                vst1q_lane_s16(pi2_refarray_buffer + 8, i2_coeff1_16x8_r1_0, 0);
+                vst1q_lane_s16(pi2_refarray_buffer + 15, i2_coeff1_16x8_r1_0, 3);
+
+                vst1q_s16(pi2_refarray_buffer + 17, ti2_res_16x8x2_r2.val[0]);
+                vst1q_lane_s16(pi2_refarray_buffer + 16, i2_coeff1_16x8_r2_0, 0);
+                vst1q_lane_s16(pi2_refarray_buffer + 23, i2_coeff1_16x8_r2_0, 3);
+
+                vst1q_s16(pi2_refarray_buffer + 25, ti2_res_16x8x2_r3.val[0]);
+                vst1q_lane_s16(pi2_refarray_buffer + 24, i2_coeff1_16x8_r3_0, 0);
+                vst1q_lane_s16(pi2_refarray_buffer + 31, i2_coeff1_16x8_r3_0, 3);
+
+                i4_horz_samp_16x4_r0_1 = vld1_s16(pi2_refarray_buffer);
+                i4_horz_samp_16x4_r0_2 = vld1_s16(pi2_refarray_buffer + 4);
+
+                i4_horz_samp_16x4_r1_1 = vld1_s16(pi2_refarray_buffer + 8);
+                i4_horz_samp_16x4_r1_2 = vld1_s16(pi2_refarray_buffer + 12);
+
+                i4_horz_samp_16x4_r2_1 = vld1_s16(pi2_refarray_buffer + 16);
+                i4_horz_samp_16x4_r2_2 = vld1_s16(pi2_refarray_buffer + 20);
+
+                i4_horz_samp_16x4_r3_1 = vld1_s16(pi2_refarray_buffer + 24);
+                i4_horz_samp_16x4_r3_2 = vld1_s16(pi2_refarray_buffer + 28);
+
+                i4_horz_res_16x4_r0_1 = vrshr_n_s16(i4_horz_samp_16x4_r0_1, 2);
+                i4_horz_res_16x4_r0_2 = vrshr_n_s16(i4_horz_samp_16x4_r0_2, 2);
+
+                i4_horz_add_32x4_r1_1 = vaddl_s16(i4_horz_samp_16x4_r0_1, i4_horz_samp_16x4_r1_1);
+                i4_horz_add_32x4_r1_2 = vaddl_s16(i4_horz_samp_16x4_r0_2, i4_horz_samp_16x4_r1_2);
+
+                i4_horz_add_32x4_r2_1 = vaddl_s16(i4_horz_samp_16x4_r1_1, i4_horz_samp_16x4_r2_1);
+                i4_horz_add_32x4_r2_2 = vaddl_s16(i4_horz_samp_16x4_r1_2, i4_horz_samp_16x4_r2_2);
+
+                i4_horz_add_32x4_r3_1 = vaddl_s16(i4_horz_samp_16x4_r2_1, i4_horz_samp_16x4_r3_1);
+                i4_horz_add_32x4_r3_2 = vaddl_s16(i4_horz_samp_16x4_r2_2, i4_horz_samp_16x4_r3_2);
+
+                i4_horz_samp_32x4_r0_1 = vshll_n_s16(i4_horz_samp_16x4_r0_1, 1);
+                i4_horz_samp_32x4_r0_2 = vshll_n_s16(i4_horz_samp_16x4_r0_2, 1);
+
+                i4_horz_samp_32x4_r1_1 = vshll_n_s16(i4_horz_samp_16x4_r1_1, 1);
+                i4_horz_samp_32x4_r1_2 = vshll_n_s16(i4_horz_samp_16x4_r1_2, 1);
+
+                i4_horz_samp_32x4_r2_1 = vshll_n_s16(i4_horz_samp_16x4_r2_1, 1);
+                i4_horz_samp_32x4_r2_2 = vshll_n_s16(i4_horz_samp_16x4_r2_2, 1);
+
+                i4_horz_samp_32x4_r3_1 = vshll_n_s16(i4_horz_samp_16x4_r3_1, 1);
+                i4_horz_samp_32x4_r3_2 = vshll_n_s16(i4_horz_samp_16x4_r3_2, 1);
+
+                i4_horz_res_32x4_r1_1 = vaddq_s32(i4_horz_samp_32x4_r0_1, i4_horz_add_32x4_r1_1);
+                i4_horz_res_32x4_r1_2 = vaddq_s32(i4_horz_samp_32x4_r0_2, i4_horz_add_32x4_r1_2);
+
+                i4_horz_res_32x4_r2_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r1_1);
+                i4_horz_res_32x4_r2_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r1_2);
+
+                i4_horz_res_32x4_r3_1 = vaddq_s32(i4_horz_samp_32x4_r1_1, i4_horz_add_32x4_r2_1);
+                i4_horz_res_32x4_r3_2 = vaddq_s32(i4_horz_samp_32x4_r1_2, i4_horz_add_32x4_r2_2);
+
+                i4_horz_res_32x4_r4_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r2_1);
+                i4_horz_res_32x4_r4_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r2_2);
+
+                i4_horz_res_32x4_r5_1 = vaddq_s32(i4_horz_samp_32x4_r2_1, i4_horz_add_32x4_r3_1);
+                i4_horz_res_32x4_r5_2 = vaddq_s32(i4_horz_samp_32x4_r2_2, i4_horz_add_32x4_r3_2);
+
+                i4_horz_res_32x4_r6_1 = vaddq_s32(i4_horz_samp_32x4_r3_1, i4_horz_add_32x4_r3_1);
+                i4_horz_res_32x4_r6_2 = vaddq_s32(i4_horz_samp_32x4_r3_2, i4_horz_add_32x4_r3_2);
+
+                i4_horz_res_16x4_r1_1 = vqrshrn_n_s32(i4_horz_res_32x4_r1_1, 4);
+                i4_horz_res_16x4_r1_2 = vqrshrn_n_s32(i4_horz_res_32x4_r1_2, 4);
+
+                i4_horz_res_16x4_r2_1 = vqrshrn_n_s32(i4_horz_res_32x4_r2_1, 4);
+                i4_horz_res_16x4_r2_2 = vqrshrn_n_s32(i4_horz_res_32x4_r2_2, 4);
+
+                i4_horz_res_16x4_r3_1 = vqrshrn_n_s32(i4_horz_res_32x4_r3_1, 4);
+                i4_horz_res_16x4_r3_2 = vqrshrn_n_s32(i4_horz_res_32x4_r3_2, 4);
+
+                i4_horz_res_16x4_r4_1 = vqrshrn_n_s32(i4_horz_res_32x4_r4_1, 4);
+                i4_horz_res_16x4_r4_2 = vqrshrn_n_s32(i4_horz_res_32x4_r4_2, 4);
+
+                i4_horz_res_16x4_r5_1 = vqrshrn_n_s32(i4_horz_res_32x4_r5_1, 4);
+                i4_horz_res_16x4_r5_2 = vqrshrn_n_s32(i4_horz_res_32x4_r5_2, 4);
+
+                i4_horz_res_16x4_r6_1 = vqrshrn_n_s32(i4_horz_res_32x4_r6_1, 4);
+                i4_horz_res_16x4_r6_2 = vqrshrn_n_s32(i4_horz_res_32x4_r6_2, 4);
+
+                i4_horz_res_16x4_r7_1 = vrshr_n_s16(i4_horz_samp_16x4_r3_1, 2);
+                i4_horz_res_16x4_r7_2 = vrshr_n_s16(i4_horz_samp_16x4_r3_2, 2);
+
+                vst1_s16(pi2_out_res, i4_horz_res_16x4_r0_1);
+                vst1_s16(pi2_out_res + 4, i4_horz_res_16x4_r0_2);
+
+                vst1_s16(pi2_out_res + i4_out_res_stride, i4_horz_res_16x4_r1_1);
+                vst1_s16(pi2_out_res + i4_out_res_stride + 4, i4_horz_res_16x4_r1_2);
+
+                vst1_s16(pi2_out_res + (i4_out_res_stride << 1), i4_horz_res_16x4_r2_1);
+                vst1_s16(pi2_out_res + (i4_out_res_stride << 1) + 4, i4_horz_res_16x4_r2_2);
+
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 3), i4_horz_res_16x4_r3_1);
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 3) + 4, i4_horz_res_16x4_r3_2);
+
+                vst1_s16(pi2_out_res + (i4_out_res_stride << 2), i4_horz_res_16x4_r4_1);
+                vst1_s16(pi2_out_res + (i4_out_res_stride << 2) + 4, i4_horz_res_16x4_r4_2);
+
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 5), i4_horz_res_16x4_r5_1);
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 5) + 4, i4_horz_res_16x4_r5_2);
+
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 6), i4_horz_res_16x4_r6_1);
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 6) + 4, i4_horz_res_16x4_r6_2);
+
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 7), i4_horz_res_16x4_r7_1);
+                vst1_s16(pi2_out_res + (i4_out_res_stride * 7) + 4, i4_horz_res_16x4_r7_2);
+
+                pi2_out_res += BLK8x8SIZE;
+            }
+            else
+            {
+                pi2_out_res += BLK8x8SIZE;
+            }
+
+            /* Block level loop updates */
+            if(1 == i4_blk_ctr)
+            {
+                pi2_inp_data -= SUB_BLK_WIDTH_4x4;
+                pi2_inp_data += (i4_inp_data_stride * SUB_BLK_HEIGHT_4x4);
+                pi2_out_res -= MB_SIZE;
+                pi2_out_res += (i4_out_res_stride * BLK8x8SIZE);
+                u4_ref_nnz >>= 2;
+            }
+            else
+            {
+                pi2_inp_data += SUB_BLK_HEIGHT_4x4;
+            }
+            u4_ref_nnz >>= 1;
+        }
+        /* The above loop iterates over all the blocks */
+    }
+}
+
+UWORD32 isvce_get_sad_with_residual_pred_neon(buffer_container_t *ps_src,
+                                              buffer_container_t *ps_pred,
+                                              buffer_container_t *ps_res, UWORD32 u4_mb_wd,
+                                              UWORD32 u4_mb_ht)
+{
+    UWORD32 i, j, u4_sad = 0;
+    UWORD8 *pu1_src = (UWORD8 *) ps_src->pv_data;
+    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
+    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
+    WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
+    WORD32 i4_res_stride = ps_res->i4_data_stride;
+    UWORD32 u4_num_rows_per_loop = 8;
+    UWORD32 u4_ht_by_8 = u4_mb_ht / u4_num_rows_per_loop;
+    uint8x8_t src0, src1, src2, src3;
+    uint8x8_t src4, src5, src6, src7;
+    uint8x8_t pred0, pred1, pred2, pred3;
+    uint8x8_t pred4, pred5, pred6, pred7;
+    int16x8_t res0_16x8, res1_16x8, res2_16x8, res3_16x8, res4_16x8, res5_16x8, res6_16x8,
+        res7_16x8;
+    uint16x8_t res0_u16x8, res1_u16x8, res2_u16x8, res3_u16x8, res4_u16x8, res5_u16x8, res6_u16x8,
+        res7_u16x8;
+    int16x8_t respred0_16x8, respred1_16x8, respred2_16x8, respred3_16x8, respred4_16x8,
+        respred5_16x8, respred6_16x8, respred7_16x8;
+    int16x8_t temp0_16x8, temp1_16x8, temp2_16x8, temp3_16x8, temp4_16x8, temp5_16x8, temp6_16x8,
+        temp7_16x8;
+    int32x4_t temp0_32x4;
+    int32x2_t temp0_32x2;
+
+    if((u4_mb_wd == 16) && (u4_mb_ht % 8 == 0))
+    {
+        for(i = 0; i < u4_ht_by_8; i++)
+        {
+            /* This loop processes 4 rows of 16 bytes each iteration */
+            /* So, 8 rows are processed across two iterations */
+            for(j = 0; j < 2; j++)
+            {
+                src0 = vld1_u8(pu1_src);
+                src1 = vld1_u8(pu1_src + 8);
+
+                pu1_src += i4_src_stride;
+
+                src2 = vld1_u8(pu1_src);
+                src3 = vld1_u8(pu1_src + 8);
+
+                pu1_src += i4_src_stride;
+
+                src4 = vld1_u8(pu1_src);
+                src5 = vld1_u8(pu1_src + 8);
+
+                pu1_src += i4_src_stride;
+
+                src6 = vld1_u8(pu1_src);
+                src7 = vld1_u8(pu1_src + 8);
+
+                pu1_src += i4_src_stride;
+
+                pred0 = vld1_u8(pu1_pred);
+                pred1 = vld1_u8(pu1_pred + 8);
+
+                pu1_pred += i4_pred_stride;
+
+                pred2 = vld1_u8(pu1_pred);
+                pred3 = vld1_u8(pu1_pred + 8);
+
+                pu1_pred += i4_pred_stride;
+
+                pred4 = vld1_u8(pu1_pred);
+                pred5 = vld1_u8(pu1_pred + 8);
+
+                pu1_pred += i4_pred_stride;
+
+                pred6 = vld1_u8(pu1_pred);
+                pred7 = vld1_u8(pu1_pred + 8);
+
+                pu1_pred += i4_pred_stride;
+
+                res0_u16x8 = vsubl_u8(src0, pred0);
+                res1_u16x8 = vsubl_u8(src1, pred1);
+                res2_u16x8 = vsubl_u8(src2, pred2);
+                res3_u16x8 = vsubl_u8(src3, pred3);
+                res4_u16x8 = vsubl_u8(src4, pred4);
+                res5_u16x8 = vsubl_u8(src5, pred5);
+                res6_u16x8 = vsubl_u8(src6, pred6);
+                res7_u16x8 = vsubl_u8(src7, pred7);
+
+                res0_16x8 = vreinterpretq_s16_u16(res0_u16x8);
+                res1_16x8 = vreinterpretq_s16_u16(res1_u16x8);
+                res2_16x8 = vreinterpretq_s16_u16(res2_u16x8);
+                res3_16x8 = vreinterpretq_s16_u16(res3_u16x8);
+                res4_16x8 = vreinterpretq_s16_u16(res4_u16x8);
+                res5_16x8 = vreinterpretq_s16_u16(res5_u16x8);
+                res6_16x8 = vreinterpretq_s16_u16(res6_u16x8);
+                res7_16x8 = vreinterpretq_s16_u16(res7_u16x8);
+
+                respred0_16x8 = vld1q_s16(pi2_res);
+                respred1_16x8 = vld1q_s16(pi2_res + 8);
+
+                pi2_res += i4_res_stride;
+
+                respred2_16x8 = vld1q_s16(pi2_res);
+                respred3_16x8 = vld1q_s16(pi2_res + 8);
+
+                pi2_res += i4_res_stride;
+
+                respred4_16x8 = vld1q_s16(pi2_res);
+                respred5_16x8 = vld1q_s16(pi2_res + 8);
+
+                pi2_res += i4_res_stride;
+
+                respred6_16x8 = vld1q_s16(pi2_res);
+                respred7_16x8 = vld1q_s16(pi2_res + 8);
+
+                pi2_res += i4_res_stride;
+
+                temp0_16x8 = vsubq_s16(res0_16x8, respred0_16x8);
+                temp1_16x8 = vsubq_s16(res1_16x8, respred1_16x8);
+                temp2_16x8 = vsubq_s16(res2_16x8, respred2_16x8);
+                temp3_16x8 = vsubq_s16(res3_16x8, respred3_16x8);
+                temp4_16x8 = vsubq_s16(res4_16x8, respred4_16x8);
+                temp5_16x8 = vsubq_s16(res5_16x8, respred5_16x8);
+                temp6_16x8 = vsubq_s16(res6_16x8, respred6_16x8);
+                temp7_16x8 = vsubq_s16(res7_16x8, respred7_16x8);
+
+                temp0_16x8 = vabsq_s16(temp0_16x8);
+                temp1_16x8 = vabsq_s16(temp1_16x8);
+                temp2_16x8 = vabsq_s16(temp2_16x8);
+                temp3_16x8 = vabsq_s16(temp3_16x8);
+                temp4_16x8 = vabsq_s16(temp4_16x8);
+                temp5_16x8 = vabsq_s16(temp5_16x8);
+                temp6_16x8 = vabsq_s16(temp6_16x8);
+                temp7_16x8 = vabsq_s16(temp7_16x8);
+
+                temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
+                temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8);
+                temp2_16x8 = vaddq_s16(temp4_16x8, temp5_16x8);
+                temp3_16x8 = vaddq_s16(temp6_16x8, temp7_16x8);
+
+                temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
+                temp1_16x8 = vaddq_s16(temp2_16x8, temp3_16x8);
+
+                temp0_16x8 = vaddq_s16(temp0_16x8, temp1_16x8);
+
+                temp0_32x4 = vpaddlq_s16(temp0_16x8);
+                temp0_32x2 = vpadd_s32(vget_low_s32(temp0_32x4), vget_high_s32(temp0_32x4));
+
+                u4_sad += vget_lane_s32(temp0_32x2, 0);
+                u4_sad += vget_lane_s32(temp0_32x2, 1);
+            }
+        }
+    }
+    else
+    {
+        for(i = 0; i < u4_mb_ht; i++)
+        {
+            for(j = 0; j < u4_mb_wd; j++)
+            {
+                WORD16 i2_src = pu1_src[j + i * i4_src_stride];
+                WORD16 i2_pred = pu1_pred[j + i * i4_pred_stride];
+                WORD16 i2_res = pi2_res[j + i * i4_res_stride];
+                u4_sad += ABS(i2_src - i2_pred - i2_res);
+            }
+        }
+    }
+
+    return u4_sad;
+}
--- a/encoder/irc_rate_control_api_structs.h
+++ b/encoder/irc_rate_control_api_structs.h
@ -16,7 +16,7 @@
 *
 *****************************************************************************
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
-*/
+ */

 #ifndef _RATE_CONTROL_API_STRUCTS_H_
 #define _RATE_CONTROL_API_STRUCTS_H_
@ -74,7 +74,9 @@ typedef struct rate_control_api_t

    UWORD8 u1_is_first_frm;

-    UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)];
+    UWORD8 au1_min_max_qp[MAX_PIC_TYPE * 2];
+
+    UWORD8 au1_min_max_avc_qp[MAX_PIC_TYPE * 2];

    WORD32 i4_prev_frm_est_bits;

@ -89,5 +91,4 @@ typedef struct rate_control_api_t

 } rate_control_api_t;

-#endif/*_RATE_CONTROL_API_STRUCTS_H_*/
-
+#endif /*_RATE_CONTROL_API_STRUCTS_H_*/
--- a/encoder/riscv/svc/isvce_function_selector.c
+++ b/encoder/riscv/svc/isvce_function_selector.c
@ -0,0 +1,80 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in svc
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#include "iv2.h"
+#include "isvce_structs.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr(isvce_codec_t *ps_codec) { isvce_init_function_ptr_generic(ps_codec); }
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T isvce_default_arch(void) { return ARCH_NA; }
--- a/encoder/riscv/svc/isvce_platform_macros.h
+++ b/encoder/riscv/svc/isvce_platform_macros.h
@ -0,0 +1,103 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+ *******************************************************************************
+ * @file
+ *  isvce_platform_macros.h
+ *
+ * @brief
+ *  Contains platform specific routines used for codec context intialization
+ *
+ * @author
+ *  ittiam
+ *
+ * @remarks
+ *  none
+ *
+ *******************************************************************************
+ */
+
+#ifndef _ISVCE_PLATFORM_MACROS_H_
+#define _ISVCE_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr(isvce_codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T isvce_default_arch(void);
+
+#endif
--- a/encoder/svc/irc_svc_rate_control_api.c
+++ b/encoder/svc/irc_svc_rate_control_api.c
@ -0,0 +1,116 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include "stdio.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "irc_trace_support.h"
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+#define DEV_Q 4        /*Q format(Shift) for Deviation range factor */
+#define HI_DEV_FCTR 22 /* 1.4*16 */
+#define LO_DEV_FCTR 12 /* 0.75*16 */
+#define GET_HI_DEV_QP(Qprev) ((((WORD32) Qprev) * HI_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
+#define GET_LO_DEV_QP(Qprev) ((((WORD32) Qprev) * LO_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
+#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d)) ? ((lo_d)) : (((Qc) > (hi_d)) ? (hi_d) : (Qc)))
+
+/*******************************************************************************
+ *  Description   : Gets the frame level qp for the given picture type
+ *                  based on bits per pixel and gradient per pixel
+ ******************************************************************************/
+/* Get frame level QP based on BPP and GPP */
+UWORD8 irc_get_frame_level_init_qp(rate_control_handle *ps_rate_control_api, rc_type_e e_rc_type,
+                                   picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp)
+{
+    DOUBLE d_frame_qp;
+
+    UWORD8 u1_min_qp =
+        ((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1)];
+    UWORD8 u1_max_qp =
+        ((rate_control_api_t *) (ps_rate_control_api))->au1_min_max_avc_qp[(e_pic_type << 1) + 1];
+
+    if((e_rc_type != VBR_STORAGE) && (e_rc_type != VBR_STORAGE_DVD_COMP) &&
+       (e_rc_type != CBR_NLDRC) && (e_rc_type != CONST_QP) && (e_rc_type != VBR_STREAMING))
+    {
+        trace_printf(
+            (const WORD8 *) (const WORD8 *) " Only VBR,NLDRC and CONST QP supported for now \n");
+        return (0);
+    }
+
+    if(d_bpp <= 0.18)
+    {
+        d_frame_qp = 43.49 + (0.59 * d_gpp) - (106.45 * d_bpp);
+    }
+    else if(d_bpp <= 0.6)
+    {
+        d_frame_qp = 25.12 + (0.69 * d_gpp) - (29.23 * (d_bpp - 0.18));
+    }
+    else
+    {
+        d_frame_qp = 13.93 + (0.74 * d_gpp) - (18.4 * (d_bpp - 0.6));
+    }
+
+    /* Truncating the QP to the Max and Min Qp values possible */
+    if(d_frame_qp < u1_min_qp) d_frame_qp = u1_min_qp;
+    if(d_frame_qp > u1_max_qp) d_frame_qp = u1_max_qp;
+
+    return ((UWORD8) (d_frame_qp + 0.5));
+}
+
+void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp,
+                               UWORD8 *pu1_min_max_avc_qp)
+{
+    WORD32 i;
+
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_rate_control_api->au1_min_max_qp[(i << 1)] = pu1_min_max_qp[(i << 1)];
+        ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i << 1) + 1];
+        ps_rate_control_api->au1_min_max_avc_qp[(i << 1)] = pu1_min_max_avc_qp[(i << 1)];
+        ps_rate_control_api->au1_min_max_avc_qp[(i << 1) + 1] = pu1_min_max_avc_qp[(i << 1) + 1];
+    }
+}
+
+UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api)
+{
+    return ((rate_control_api_t *) (ps_rate_control_api))->u1_scd_detected;
+}
--- a/encoder/svc/irc_svc_rate_control_api.h
+++ b/encoder/svc/irc_svc_rate_control_api.h
@ -0,0 +1,46 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+#ifndef _IRC_SVC_RATE_CONTROL_API_H_
+#define _IRC_SVC_RATE_CONTROL_API_H_
+
+/* Dependencies of 'irc_rate_control_api_structs' */
+#include "irc_picture_type.h"
+#include "irc_rd_model.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_est_sad.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_common.h"
+
+#include "irc_rate_control_api_structs.h"
+
+/* Get frame level QP based on BPP and GPP */
+UWORD8 irc_get_frame_level_init_qp(rate_control_api_t *ps_rate_control_api, rc_type_e e_rc_type,
+                                   picture_type_e e_pic_type, DOUBLE d_bpp, DOUBLE d_gpp);
+
+void irc_change_qp_constraints(rate_control_api_t *ps_rate_control_api, UWORD8 *pu1_min_max_qp,
+                               UWORD8 *pu1_min_max_avc_qp);
+
+extern UWORD8 irc_is_scenecut(rate_control_api_t *ps_rate_control_api);
+
+#endif
--- a/encoder/svc/isvce.h
+++ b/encoder/svc/isvce.h
--- a/encoder/svc/isvce_api.c
+++ b/encoder/svc/isvce_api.c
--- a/encoder/svc/isvce_cabac.c
+++ b/encoder/svc/isvce_cabac.c
@ -0,0 +1,753 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_cabac.c
+*
+* @brief
+*  Contains all leaf level functions for CABAC entropy coding.
+*
+*
+* @author
+* Doney Alex
+*
+* @par List of Functions:
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_macros.h"
+#include "isvc_defs.h"
+#include "isvce_defs.h"
+#include "isvc_macros.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "isvc_inter_pred_filters.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_cabac_tables.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "isvce_cabac.h"
+#include "isvce_encode_header.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
+ *   unary/ k-th order Exp-Golomb  (UEGk) binarization process,
+ *   where k = 0 as defined in 9.3.2.3 of  ITU_T_H264-201402
+ *
+ * @param[in] i2_sufs
+ *  Suffix bit string
+ *
+ * @param[in] pi1_bins_len
+ *  Pointer to length of tthe string
+ *
+ * @returns Binarized value
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len)
+{
+    WORD32 unary_length;
+    UWORD32 u4_sufs_shiftk_plus1, u4_egk, u4_unary_bins;
+
+    u4_sufs_shiftk_plus1 = i2_sufs + 1;
+
+    unary_length = (32 - CLZ(u4_sufs_shiftk_plus1) + (0 == u4_sufs_shiftk_plus1));
+
+    /* unary code with (unary_length-1) '1's and terminating '0' bin */
+    u4_unary_bins = (1 << unary_length) - 2;
+
+    /* insert the symbol prefix of (unary length - 1)  bins */
+    u4_egk = (u4_unary_bins << (unary_length - 1)) |
+             (u4_sufs_shiftk_plus1 & ((1 << (unary_length - 1)) - 1));
+
+    /* length of the code = 2 *(unary_length - 1) + 1 + k */
+    *pi1_bins_len = (2 * unary_length) - 1;
+
+    return (u4_egk);
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  Get cabac context for the MB :calculates the pointers to Top and   left
+ *          cabac neighbor context depending upon neighbor  availability.
+ *
+ * @param[in] ps_ent_ctxt
+ *  Pointer to entropy context structure
+ *
+ * @param[in] u4_mb_type
+ *  Type of MB
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type)
+{
+    /* CABAC context */
+    isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+    isvce_mb_info_ctxt_t *ps_ctx_inc_mb_map;
+    cab_csbp_t *ps_lft_csbp;
+
+    WORD32 i4_lft_avail, i4_top_avail, i4_is_intra;
+    WORD32 i4_mb_x, i4_mb_y;
+    UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
+
+    i4_is_intra = ((u4_mb_type == I16x16) || (u4_mb_type == I8x8) || (u4_mb_type == I4x4));
+
+    /* derive neighbor availability */
+    i4_mb_x = ps_ent_ctxt->i4_mb_x;
+    i4_mb_y = ps_ent_ctxt->i4_mb_y;
+    pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs);
+    /* left macroblock availability */
+    i4_lft_avail = (i4_mb_x == 0 || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ? 0 : 1;
+    /* top macroblock availability */
+    i4_top_avail = (i4_mb_y == 0 ||
+                    (pu1_slice_idx[i4_mb_x - ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))
+                       ? 0
+                       : 1;
+    i4_mb_x = ps_ent_ctxt->i4_mb_x;
+    ps_ctx_inc_mb_map = ps_cabac_ctxt->ps_mb_map_ctxt_inc;
+    ps_cabac_ctxt->ps_curr_ctxt_mb_info = ps_ctx_inc_mb_map + i4_mb_x;
+    ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
+    ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
+    ps_lft_csbp = ps_cabac_ctxt->ps_lft_csbp;
+    ps_cabac_ctxt->pu1_left_y_ac_csbp = &ps_lft_csbp->u1_y_ac_csbp_top_mb;
+    ps_cabac_ctxt->pu1_left_uv_ac_csbp = &ps_lft_csbp->u1_uv_ac_csbp_top_mb;
+    ps_cabac_ctxt->pu1_left_yuv_dc_csbp = &ps_lft_csbp->u1_yuv_dc_csbp_top_mb;
+    ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc = &ps_cabac_ctxt->i1_left_ref_idx_ctx_inc_arr[0][0];
+    ps_cabac_ctxt->pu1_left_mv_ctxt_inc = ps_cabac_ctxt->u1_left_mv_ctxt_inc_arr[0];
+
+    if(i4_lft_avail) ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info - 1;
+    if(i4_top_avail) ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+
+    if(!i4_lft_avail)
+    {
+        UWORD8 u1_def_csbp = i4_is_intra ? 0xf : 0;
+        *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = u1_def_csbp;
+        *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = u1_def_csbp;
+        *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = u1_def_csbp;
+        *((UWORD32 *) ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc) = 0;
+        memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+    }
+    if(!i4_top_avail)
+    {
+        UWORD8 u1_def_csbp = i4_is_intra ? 0xff : 0;
+        ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_ac_csbp = u1_def_csbp;
+        ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_dc_csbp = u1_def_csbp;
+        ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[0] =
+            ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[1] =
+                ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[2] =
+                    ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[3] = 0;
+        memset(ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv, 0, 16);
+    }
+}
+
+/**
+ *******************************************************************************
+ * @brief
+ *  flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
+ *
+ *  @param[in]   ps_cabac_ctxt
+ *  pointer to cabac context (handle)
+ *
+ * @returns  none
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt)
+{
+    /* bit stream ptr */
+    bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
+    encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
+    UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+    UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
+    UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
+    UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
+    WORD32 zero_run = ps_stream->i4_zero_bytes_run;
+    UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
+
+    /************************************************************************/
+    /* Insert the carry (propogated in previous byte) along with            */
+    /* outstanding bytes (if any) and flush remaining bits                  */
+    /************************************************************************/
+    {
+        /* carry = 1 => putbit(1); carry propogated due to L renorm */
+        WORD32 carry = (u4_low >> (u4_bits_gen + CABAC_BITS)) & 0x1;
+        WORD32 last_byte;
+        WORD32 bits_left;
+        WORD32 rem_bits;
+
+        if(carry)
+        {
+            /* CORNER CASE: if the previous data is 0x000003, then EPB will be
+             inserted and the data will become 0x00000303 and if the carry is present,
+             it will be added with the last byte and it will become 0x00000304 which
+             is not correct as per standard */
+            /* so check for previous four bytes and if it is equal to 0x00000303
+             then subtract u4_strm_buf_offset by 1 */
+            if(pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 &&
+               pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 &&
+               pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 &&
+               pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)
+            {
+                u4_strm_buf_offset -= 1;
+            }
+            /* previous byte carry add will not result in overflow to        */
+            /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes  */
+            pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
+            zero_run = 0;
+        }
+
+        /*        Insert outstanding bytes (if any)         */
+        while(u4_out_standing_bytes)
+        {
+            UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
+
+            PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
+            u4_out_standing_bytes--;
+        }
+
+        /*  clear the carry in low */
+        u4_low &= ((1 << (u4_bits_gen + CABAC_BITS)) - 1);
+
+        /* extract the remaining bits;                                   */
+        /* includes additional msb bit of low as per Figure 9-12      */
+        bits_left = u4_bits_gen + 1;
+        rem_bits = (u4_low >> (u4_bits_gen + CABAC_BITS - bits_left));
+
+        if(bits_left >= 8)
+        {
+            last_byte = (rem_bits >> (bits_left - 8)) & 0xFF;
+            PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
+            bits_left -= 8;
+        }
+
+        /* insert last byte along with rbsp stop bit(1) and 0's in the end */
+        last_byte =
+            (rem_bits << (8 - bits_left)) | (1 << (7 - bits_left) | (1 << (7 - bits_left - 1)));
+        last_byte &= 0xFF;
+        PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
+
+        /* update the state variables and return success */
+        ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
+        ps_stream->i4_zero_bytes_run = 0;
+        /* Default init values for scratch variables of bitstream context */
+        ps_stream->u4_cur_word = 0;
+        ps_stream->i4_bits_left_in_cw = WORD_SIZE;
+    }
+}
+
+/**
+ ******************************************************************************
+ *
+ *  @brief Puts new byte (and outstanding bytes) into bitstream after cabac
+ *         renormalization
+ *
+ *  @par   Description
+ *  1. Extract the leading byte of low(L)
+ *  2. If leading byte=0xff increment outstanding bytes and return
+ *     (as the actual bits depend on carry propogation later)
+ *  3. If leading byte is not 0xff check for any carry propogation
+ *  4. Insert the carry (propogated in previous byte) along with outstanding
+ *     bytes (if any) and leading byte
+ *
+ *
+ *  @param[in]   ps_cabac_ctxt
+ *  pointer to cabac context (handle)
+ *
+ *  @return
+ *
+ ******************************************************************************
+ */
+void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt)
+{
+    /* bit stream ptr */
+    bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
+    encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
+    UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+    UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
+    WORD32 lead_byte = u4_low >> (u4_bits_gen + CABAC_BITS - 8);
+
+    /* Sanity checks */
+    ASSERT((ps_cab_enc_env->u4_code_int_range >= 256) && (ps_cab_enc_env->u4_code_int_range < 512));
+    ASSERT((u4_bits_gen >= 8));
+
+    /* update bits generated and low after extracting leading byte */
+    u4_bits_gen -= 8;
+    ps_cab_enc_env->u4_code_int_low &= ((1 << (CABAC_BITS + u4_bits_gen)) - 1);
+    ps_cab_enc_env->u4_bits_gen = u4_bits_gen;
+
+    /************************************************************************/
+    /* 1. Extract the leading byte of low(L)                                */
+    /* 2. If leading byte=0xff increment outstanding bytes and return       */
+    /*      (as the actual bits depend on carry propogation later)          */
+    /* 3. If leading byte is not 0xff check for any carry propogation       */
+    /* 4. Insert the carry (propogated in previous byte) along with         */
+    /*    outstanding bytes (if any) and leading byte                       */
+    /************************************************************************/
+    if(lead_byte == 0xff)
+    {
+        /* actual bits depend on carry propogration     */
+        ps_cab_enc_env->u4_out_standing_bytes++;
+        return;
+    }
+    else
+    {
+        UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
+        UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
+        /* carry = 1 => putbit(1); carry propogated due to L renorm */
+        WORD32 carry = (lead_byte >> 8) & 0x1;
+        WORD32 zero_run = ps_stream->i4_zero_bytes_run;
+        UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
+
+        /*********************************************************************/
+        /*        Insert the carry propogated in previous byte               */
+        /*                                                                   */
+        /* Note : Do not worry about corruption into slice header align byte */
+        /*        This is because the first bin cannot result in overflow    */
+        /*********************************************************************/
+        if(carry)
+        {
+            /* CORNER CASE: if the previous data is 0x000003, then EPB will be
+             inserted and the data will become 0x00000303 and if the carry is present,
+             it will be added with the last byte and it will become 0x00000304 which
+             is not correct as per standard */
+            /* so check for previous four bytes and if it is equal to 0x00000303
+             then subtract u4_strm_buf_offset by 1 */
+            if((u4_strm_buf_offset > 3) && (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03) &&
+               (pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03) &&
+               (pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00) &&
+               (pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00))
+            {
+                u4_strm_buf_offset -= 1;
+            }
+
+            /* previous byte carry add will not result in overflow to        */
+            /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes  */
+            if(u4_strm_buf_offset > 0)
+            {
+                pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
+                zero_run = 0;
+            }
+        }
+
+        /*        Insert outstanding bytes (if any)         */
+        while(u4_out_standing_bytes)
+        {
+            UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
+
+            PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
+
+            u4_out_standing_bytes--;
+        }
+        ps_cab_enc_env->u4_out_standing_bytes = 0;
+
+        /*        Insert the leading byte                   */
+        lead_byte &= 0xFF;
+        PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, lead_byte, zero_run);
+
+        /* update the state variables and return success */
+        ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
+        ps_stream->i4_zero_bytes_run = zero_run;
+    }
+}
+
+/**
+******************************************************************************
+*
+*  @brief Codes a bin based on probablilty and mps packed context model
+*
+*  @par   Description
+*  1. Apart from encoding bin, context model is updated as per state transition
+*  2. Range and Low renormalization is done based on bin and original state
+*  3. After renorm bistream is updated (if required)
+*
+*  @param[in]   ps_cabac
+*  pointer to cabac context (handle)
+*
+*  @param[in]   bin
+*  bin(boolean) to be encoded
+*
+*  @param[in]  pu1_bin_ctxts
+*  index of cabac context model containing pState[bits 5-0] | MPS[bit6]
+*
+*  @return
+*
+******************************************************************************
+*/
+void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin, bin_ctxt_model *pu1_bin_ctxts)
+{
+    encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+    UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+    UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+    UWORD32 u4_rlps;
+    UWORD8 state_mps = (*pu1_bin_ctxts) & 0x3F;
+    UWORD8 u1_mps = !!((*pu1_bin_ctxts) & (0x40));
+    WORD32 shift;
+    UWORD32 u4_table_val;
+    /* Sanity checks */
+    ASSERT((bin == 0) || (bin == 1));
+    ASSERT((u4_range >= 256) && (u4_range < 512));
+
+    /* Get the lps range from LUT based on quantized range and state */
+    u4_table_val = gau4_isvc_cabac_table[state_mps][(u4_range >> 6) & 0x3];
+    u4_rlps = u4_table_val & 0xFF;
+    u4_range -= u4_rlps;
+
+    /* check if bin is mps or lps */
+    if(u1_mps ^ bin)
+    {
+        /* lps path;  L= L + R; R = RLPS */
+        u4_low += u4_range;
+        u4_range = u4_rlps;
+        if(state_mps == 0)
+        {
+            /* MPS(CtxIdx) = 1 - MPS(CtxIdx) */
+            u1_mps = 1 - u1_mps;
+        } /* update the context model from state transition LUT */
+
+        state_mps = (u4_table_val >> 15) & 0x3F;
+    }
+    else
+    { /* update the context model from state transition LUT */
+        state_mps = (u4_table_val >> 8) & 0x3F;
+    }
+
+    (*pu1_bin_ctxts) = (u1_mps << 6) | state_mps;
+
+    /*****************************************************************/
+    /* Renormalization; calculate bits generated based on range(R)   */
+    /* Note : 6 <= R < 512; R is 2 only for terminating encode       */
+    /*****************************************************************/
+    GETRANGE(shift, u4_range);
+    shift = 9 - shift;
+    u4_low <<= shift;
+    u4_range <<= shift;
+
+    /* bits to be inserted in the bitstream */
+    ps_cab_enc_env->u4_bits_gen += shift;
+    ps_cab_enc_env->u4_code_int_range = u4_range;
+    ps_cab_enc_env->u4_code_int_low = u4_low;
+
+    /* generate stream when a byte is ready */
+    if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+    {
+        isvce_cabac_put_byte(ps_cabac);
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Encoding process for a binary decision :implements encoding process of a
+decision
+*  as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol.
+Implements
+*  flowchart Figure 9-7( ITU_T_H264-201402)
+*
+* @param[in] u4_bins
+* array of bin values
+*
+* @param[in] i1_bins_len
+*  Length of bins, maximum 32
+*
+* @param[in] u4_ctx_inc
+*  CtxInc, byte0- bin0, byte1-bin1 ..
+*
+* @param[in] i1_valid_len
+*  valid length of bins, after that CtxInc is constant
+*
+* @param[in] pu1_bin_ctxt_type
+*  Pointer to binary contexts
+
+* @param[in] ps_cabac
+*  Pointer to cabac_context_structure
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc,
+                                WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type,
+                                isvce_cabac_ctxt_t *ps_cabac)
+{
+    WORD8 i;
+    UWORD8 u1_ctx_inc, u1_bin;
+
+    for(i = 0; i < i1_bins_len; i++)
+    {
+        u1_bin = (u4_bins & 0x01);
+        u4_bins = u4_bins >> 1;
+        u1_ctx_inc = u4_ctx_inc & 0x0f;
+        if(i < i1_valid_len) u4_ctx_inc = u4_ctx_inc >> 4;
+        /* Encode the bin */
+        isvce_cabac_encode_bin(ps_cabac, u1_bin, pu1_bin_ctxt_type + u1_ctx_inc);
+    }
+}
+
+/**
+ *******************************************************************************
+ * @brief
+ *  Encoding process for a binary decision before termination:Encoding process
+ *  of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
+ *
+ * @param[in] ps_cabac
+ *  Pointer to cabac structure
+ *
+ * @param[in] term_bin
+ *  Symbol value, end of slice or not, term_bin is binary
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin)
+{
+    encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+
+    UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+    UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+    UWORD32 u4_rlps;
+    WORD32 shift;
+
+    /* Sanity checks */
+    ASSERT((u4_range >= 256) && (u4_range < 512));
+    ASSERT((term_bin == 0) || (term_bin == 1));
+
+    /*  term_bin = 1 has lps range = 2 */
+    u4_rlps = 2;
+    u4_range -= u4_rlps;
+
+    /* if terminate L is incremented by curR and R=2 */
+    if(term_bin)
+    {
+        /* lps path;  L= L + R; R = RLPS */
+        u4_low += u4_range;
+        u4_range = u4_rlps;
+    }
+
+    /*****************************************************************/
+    /* Renormalization; calculate bits generated based on range(R)   */
+    /* Note : 6 <= R < 512; R is 2 only for terminating encode       */
+    /*****************************************************************/
+    GETRANGE(shift, u4_range);
+    shift = 9 - shift;
+    u4_low <<= shift;
+    u4_range <<= shift;
+
+    /* bits to be inserted in the bitstream */
+    ps_cab_enc_env->u4_bits_gen += shift;
+    ps_cab_enc_env->u4_code_int_range = u4_range;
+    ps_cab_enc_env->u4_code_int_low = u4_low;
+
+    /* generate stream when a byte is ready */
+    if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+    {
+        isvce_cabac_put_byte(ps_cabac);
+    }
+
+    if(term_bin)
+    {
+        isvce_cabac_flush(ps_cabac);
+    }
+}
+
+/**
+ *******************************************************************************
+ * @brief
+ * Bypass encoding process for binary decisions:  Explained (9.3.4.4
+ *:ITU_T_H264-201402) , flowchart 9-10.
+ *
+ *  @param[ino]  ps_cabac : pointer to cabac context (handle)
+ *
+ *  @param[in]   bin :  bypass bin(0/1) to be encoded
+ *
+ *  @returns
+ *
+ *  @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin)
+{
+    encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+
+    UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+    UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+
+    /* Sanity checks */
+    ASSERT((u4_range >= 256) && (u4_range < 512));
+    ASSERT((bin == 0) || (bin == 1));
+
+    u4_low <<= 1;
+    /* add range if bin is 1 */
+    if(bin)
+    {
+        u4_low += u4_range;
+    }
+
+    /* 1 bit to be inserted in the bitstream */
+    ps_cab_enc_env->u4_bits_gen++;
+    ps_cab_enc_env->u4_code_int_low = u4_low;
+
+    /* generate stream when a byte is ready */
+    if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+    {
+        isvce_cabac_put_byte(ps_cabac);
+    }
+}
+
+/**
+******************************************************************************
+*
+*  @brief Encodes a series of bypass bins (FLC bypass bins)
+*
+*  @par   Description
+*  This function is more optimal than calling isvce_cabac_encode_bypass_bin()
+*  in a loop as cabac low, renorm and generating the stream (8bins at a time)
+*  can be done in one operation
+*
+*  @param[inout]ps_cabac
+*   pointer to cabac context (handle)
+*
+*  @param[in]   u4_bins
+*   syntax element to be coded (as FLC bins)
+*
+*  @param[in]   num_bins
+*   This is the FLC length for u4_sym
+*
+*  @return
+*
+******************************************************************************
+*/
+
+void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins)
+{
+    encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+
+    UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+    WORD32 next_byte;
+
+    /* Sanity checks */
+    ASSERT((num_bins < 33) && (num_bins > 0));
+    ASSERT((u4_range >= 256) && (u4_range < 512));
+
+    /* Compute bit always to populate the trace */
+    /* increment bits generated by num_bins */
+
+    /* Encode 8bins at a time and put in the bit-stream */
+    while(num_bins > 8)
+    {
+        num_bins -= 8;
+
+        next_byte = (u4_bins >> (num_bins)) & 0xff;
+
+        /*  L = (L << 8) +  (R * next_byte) */
+        ps_cab_enc_env->u4_code_int_low <<= 8;
+        ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range);
+        ps_cab_enc_env->u4_bits_gen += 8;
+
+        if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+        {
+            /*  insert the leading byte of low into stream */
+            isvce_cabac_put_byte(ps_cabac);
+        }
+    }
+
+    /* Update low with remaining bins and return */
+    next_byte = (u4_bins & ((1 << num_bins) - 1));
+
+    ps_cab_enc_env->u4_code_int_low <<= num_bins;
+    ps_cab_enc_env->u4_code_int_low += (next_byte * u4_range);
+    ps_cab_enc_env->u4_bits_gen += num_bins;
+
+    if(ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+    {
+        /*  insert the leading byte of low into stream */
+        isvce_cabac_put_byte(ps_cabac);
+    }
+}
--- a/encoder/svc/isvce_cabac.h
+++ b/encoder/svc/isvce_cabac.h
@ -0,0 +1,380 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+ *******************************************************************************
+ * @file
+ *  isvce_cabac_structs.h
+ *
+ * @brief
+ *  This file contains cabac related macros, enums, tables and function
+ *declarations.
+ *
+ * @author
+ *  Doney Alex
+ *
+ * @remarks
+ *  none
+ *
+ *******************************************************************************
+ */
+
+#ifndef _ISVCE_CABAC_H_
+#define _ISVCE_CABAC_H_
+
+#include "ih264e_cabac.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_defs.h"
+#include "isvce_structs.h"
+
+/*****************************************************************************/
+/* Function Declarations                                                 */
+/*****************************************************************************/
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize default context values and pointers.
+ *
+ * @param[in] ps_ent_ctxt
+ *  Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize cabac context: Intitalize all contest with init values given in
+ *the spec. Called at the beginning of entropy coding of each slice for CABAC
+ *encoding.
+ *
+ * @param[in] ps_ent_ctxt
+ *  Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+extern void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
+ *   unary/ k-th order Exp-Golomb  (UEGk) binarization process,
+ *   where k = 0 as defined in 9.3.2.3 of  ITU_T_H264-201402
+ *
+ * @param[in] i2_sufs
+ *  Suffix bit string
+ *
+ * @param[in] pi1_bins_len
+ *  Pointer to length of the string
+ *
+ * @returns Binarized value
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+UWORD32 isvce_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  Get cabac context for the MB :calculates the pointers to Top and   left
+ *          cabac neighbor context depending upon neighbor  availability.
+ *
+ * @param[in] ps_ent_ctxt
+ *  Pointer to entropy context structure
+ *
+ * @param[in] u4_mb_type
+ *  Type of MB
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_get_cabac_context(isvce_entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type);
+
+/**
+ *******************************************************************************
+ * @brief
+ *  flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
+ *
+ *  @param[in]   ps_cabac_ctxt
+ *  pointer to cabac context (handle)
+ *
+ * @returns  none
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_cabac_flush(isvce_cabac_ctxt_t *ps_cabac_ctxt);
+
+/**
+ ******************************************************************************
+ *
+ *  @brief Puts new byte (and outstanding bytes) into bitstream after cabac
+ *         renormalization
+ *
+ *  @par   Description
+ *  1. Extract the leading byte of low(L)
+ *  2. If leading byte=0xff increment outstanding bytes and return
+ *     (as the actual bits depend on carry propogation later)
+ *  3. If leading byte is not 0xff check for any carry propogation
+ *  4. Insert the carry (propogated in previous byte) along with outstanding
+ *     bytes (if any) and leading byte
+ *
+ *
+ *  @param[inout]   ps_cabac_ctxt
+ *  pointer to cabac context (handle)
+ *
+ *  @return
+ *
+ ******************************************************************************
+ */
+void isvce_cabac_put_byte(isvce_cabac_ctxt_t *ps_cabac_ctxt);
+
+/**
+ ******************************************************************************
+ *
+ *  @brief Codes a bin based on probablilty and mps packed context model
+ *
+ *  @par   Description
+ *  1. Apart from encoding bin, context model is updated as per state transition
+ *  2. Range and Low renormalization is done based on bin and original state
+ *  3. After renorm bistream is updated (if required)
+ *
+ *  @param[inout]   ps_cabac
+ *  pointer to cabac context (handle)
+ *
+ *  @param[in]   bin
+ *  bin(boolean) to be encoded
+ *
+ *  @param[in]  pu1_bin_ctxts
+ *  index of cabac context model containing pState[bits 5-0] | MPS[bit6]
+ *
+ *  @return
+ *
+ ******************************************************************************
+ */
+void isvce_cabac_encode_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin,
+                            bin_ctxt_model *pu1_bin_ctxts);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  Encoding process for a binary decision :implements encoding process of a
+ decision
+ *  as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol.
+ Implements
+ *  flowchart Figure 9-7( ITU_T_H264-201402)
+ *
+ * @param[in] u4_bins
+ * array of bin values
+ *
+ * @param[in] i1_bins_len
+ *  Length of bins, maximum 32
+ *
+ * @param[in] u4_ctx_inc
+ *  CtxInc, byte0- bin0, byte1-bin1 ..
+ *
+ * @param[in] i1_valid_len
+ *  valid length of bins, after that CtxInc is constant
+ *
+ * @param[in] pu1_bin_ctxt_type
+ *  Pointer to binary contexts
+
+ * @param[in] ps_cabac
+ *  Pointer to cabac_context_structure
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, UWORD32 u4_ctx_inc,
+                                WORD8 i1_valid_len, bin_ctxt_model *pu1_bin_ctxt_type,
+                                isvce_cabac_ctxt_t *ps_cabac);
+
+/**
+ *******************************************************************************
+ * @brief
+ *  Encoding process for a binary decision before termination:Encoding process
+ *  of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
+ *
+ * @param[in] ps_cabac
+ *  Pointer to cabac structure
+ *
+ * @param[in] term_bin
+ *  Symbol value, end of slice or not, term_bin is binary
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_cabac_encode_terminate(isvce_cabac_ctxt_t *ps_cabac, WORD32 term_bin);
+
+/**
+ *******************************************************************************
+ * @brief
+ * Bypass encoding process for binary decisions:  Explained (9.3.4.4
+ *:ITU_T_H264-201402) , flowchart 9-10.
+ *
+ *  @param[in]  ps_cabac : pointer to cabac context (handle)
+ *
+ *  @param[in]   bin :  bypass bin(0/1) to be encoded
+ *
+ *  @returns
+ *
+ *  @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+void isvce_cabac_encode_bypass_bin(isvce_cabac_ctxt_t *ps_cabac, WORD32 bin);
+
+/**
+ ******************************************************************************
+ *
+ *  @brief Encodes a series of bypass bins (FLC bypass bins)
+ *
+ *  @par   Description
+ *  This function is more optimal than calling isvce_cabac_encode_bypass_bin()
+ *  in a loop as cabac low, renorm and generating the stream (8bins at a time)
+ *  can be done in one operation
+ *
+ *  @param[inout]ps_cabac
+ *   pointer to cabac context (handle)
+ *
+ *  @param[in]   u4_bins
+ *   syntax element to be coded (as FLC bins)
+ *
+ *  @param[in]   num_bins
+ *   This is the FLC length for u4_sym
+ *
+ *  @return
+ *
+ ******************************************************************************
+ */
+
+void isvce_cabac_encode_bypass_bins(isvce_cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  This function generates CABAC coded bit stream for an Intra Slice.
+ *
+ * @description
+ *  The mb syntax layer for intra slices constitutes luma mb mode, luma sub
+ *modes (if present), mb qp delta, coded block pattern, chroma mb mode and
+ *  luma/chroma residue. These syntax elements are written as directed by table
+ *  7.3.5 of h264 specification.
+ *
+ * @param[in] ps_ent_ctxt
+ *  pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T isvce_write_islice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  This function generates CABAC coded bit stream for Inter slices
+ *
+ * @description
+ *  The mb syntax layer for inter slices constitutes luma mb mode, luma sub
+ *modes (if present), mb qp delta, coded block pattern, chroma mb mode and
+ *  luma/chroma residue. These syntax elements are written as directed by table
+ *  7.3.5 of h264 specification
+ *
+ * @param[in] ps_ent_ctxt
+ *  pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T isvce_write_pslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  This function generates CABAC coded bit stream for B slices
+ *
+ * @description
+ *  The mb syntax layer for inter slices constitutes luma mb mode,
+ *  mb qp delta, coded block pattern, chroma mb mode and
+ *  luma/chroma residue. These syntax elements are written as directed by table
+ *  7.3.5 of h264 specification
+ *
+ * @param[in] ps_ent_ctxt
+ *  pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T isvce_write_bslice_mb_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
+
+#if ENABLE_RE_ENC_AS_SKIP
+IH264E_ERROR_T isvce_reencode_as_skip_frame_cabac(isvce_entropy_ctxt_t *ps_ent_ctxt);
+#endif
+
+#endif
--- a/encoder/svc/isvce_cabac_encode.c
+++ b/encoder/svc/isvce_cabac_encode.c
--- a/encoder/svc/isvce_cabac_init.c
+++ b/encoder/svc/isvce_cabac_init.c
@ -0,0 +1,215 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+* isvce_cabac_init.c
+*
+* @brief
+*  Contains all initialization functions for cabac contexts
+*
+* @author
+*  Doney Alex
+*
+* @par List of Functions:
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "isvc_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "isvc_inter_pred_filters.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_platform_macros.h"
+#include "isvc_macros.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "isvc_common_tables.h"
+#include "isvc_cabac_tables.h"
+#include "ih264_list.h"
+#include "isvce_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "isvce_cabac.h"
+#include "isvce_process.h"
+#include "ithread.h"
+#include "isvce_encode_header.h"
+#include "isvce_globals.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264_cavlc_tables.h"
+#include "isvce_deblk.h"
+#include "isvce_me.h"
+#include "ih264e_debug.h"
+#include "ih264e_master.h"
+#include "isvce_utils.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_platform_macros.h"
+#include "ime_statistics.h"
+
+/*****************************************************************************/
+/*  Function definitions .                                                   */
+/*****************************************************************************/
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize cabac encoding environment
+ *
+ * @param[in] ps_cab_enc_env
+ *  Pointer to encoding_envirnoment_t structure
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+static void isvce_init_cabac_enc_envirnoment(encoding_envirnoment_t *ps_cab_enc_env)
+{
+    ps_cab_enc_env->u4_code_int_low = 0;
+    ps_cab_enc_env->u4_code_int_range = 0x1fe;
+    ps_cab_enc_env->u4_out_standing_bytes = 0;
+    ps_cab_enc_env->u4_bits_gen = 0;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize default context values and pointers (Called once at the beginning
+ *of encoding).
+ *
+ * @param[in] ps_ent_ctxt
+ *  Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_init_cabac_table(isvce_entropy_ctxt_t *ps_ent_ctxt)
+{
+    /* CABAC context */
+    isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+    ps_cabac_ctxt->ps_mb_map_ctxt_inc = ps_cabac_ctxt->ps_mb_map_ctxt_inc_base + 1;
+    ps_cabac_ctxt->ps_lft_csbp = &ps_cabac_ctxt->s_lft_csbp;
+    ps_cabac_ctxt->ps_bitstrm = ps_ent_ctxt->ps_bitstrm;
+
+    {
+        /* 0th entry of mb_map_ctxt_inc will be always be containing default values
+         */
+        /* for CABAC context representing MB not available                       */
+        isvce_mb_info_ctxt_t *ps_def_ctxt = ps_cabac_ctxt->ps_mb_map_ctxt_inc - 1;
+
+        ps_def_ctxt->u1_mb_type = CAB_SKIP;
+        ps_def_ctxt->u1_cbp = 0x0f;
+        ps_def_ctxt->u1_intrapred_chroma_mode = 0;
+        ps_def_ctxt->u1_base_mode_flag = 0;
+
+        memset(ps_def_ctxt->i1_ref_idx, 0, sizeof(ps_def_ctxt->i1_ref_idx));
+        memset(ps_def_ctxt->u1_mv, 0, sizeof(ps_def_ctxt->u1_mv));
+        ps_cabac_ctxt->ps_def_ctxt_mb_info = ps_def_ctxt;
+    }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize cabac context: Initialize all contest with init values given in
+ *the spec. Called at the beginning of entropy coding of each slice for CABAC
+ *encoding.
+ *
+ * @param[in] ps_ent_ctxt
+ *  Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+void isvce_init_cabac_ctxt(isvce_entropy_ctxt_t *ps_ent_ctxt, slice_header_t *ps_slice_hdr)
+{
+    isvce_cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+
+    const UWORD8 u1_slice_type = ps_slice_hdr->u1_slice_type;
+    WORD8 i1_cabac_init_idc = 0;
+    bin_ctxt_model *au1_cabac_ctxt_table = ps_cabac_ctxt->au1_cabac_ctxt_table;
+    UWORD8 u1_qp_y = ps_slice_hdr->i1_slice_qp;
+
+    isvce_init_cabac_enc_envirnoment(&ps_cabac_ctxt->s_cab_enc_env);
+
+    ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0;
+
+    if(ISLICE != u1_slice_type)
+    {
+        i1_cabac_init_idc = ps_slice_hdr->i1_cabac_init_idc;
+    }
+    else
+    {
+        i1_cabac_init_idc = 3;
+    }
+
+    memcpy(au1_cabac_ctxt_table, gau1_isvc_cabac_ctxt_init_table[i1_cabac_init_idc][u1_qp_y],
+           NUM_SVC_CABAC_CTXTS * sizeof(bin_ctxt_model));
+}
--- a/encoder/svc/isvce_cabac_structs.h
+++ b/encoder/svc/isvce_cabac_structs.h
@ -0,0 +1,142 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+ *******************************************************************************
+ * @file
+ *  isvce_cabac_structs.h
+ *
+ * @brief
+ *  This file contains cabac related structure definitions.
+ *
+ * @author
+ *  Doney Alex
+ *
+ * @remarks
+ *  none
+ *
+ *******************************************************************************
+ */
+
+#ifndef _ISVCE_CABAC_STRUCTS_H_
+#define _ISVCE_CABAC_STRUCTS_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_cabac_tables.h"
+#include "ih264e_bitstream.h"
+#include "ih264e_cabac_structs.h"
+
+/**
+ ******************************************************************************
+ *  @brief      MB info for cabac
+ ******************************************************************************
+ */
+typedef struct isvce_mb_info_ctxt_t
+{
+    /* Neighbour availability Variables needed to get CtxtInc, for CABAC */
+    UWORD8 u1_mb_type; /* !< macroblock type: I/P/B/SI/SP */
+
+    UWORD8 u1_cbp; /* !< Coded Block Pattern */
+    UWORD8 u1_intrapred_chroma_mode;
+
+    /*************************************************************************/
+    /*               Arrangnment of AC CSBP                                  */
+    /*        bits:  b7 b6 b5 b4 b3 b2 b1 b0                                 */
+    /*        CSBP:  V1 V0 U1 U0 Y3 Y2 Y1 Y0                                 */
+    /*************************************************************************/
+    UWORD8 u1_yuv_ac_csbp;
+    /*************************************************************************/
+    /*               Arrangnment of DC CSBP                                  */
+    /*        bits:  b7  b6  b5  b4  b3  b2  b1  b0                          */
+    /*        CSBP:   x   x   x   x   x  Vdc Udc Ydc                         */
+    /*************************************************************************/
+    UWORD8 u1_yuv_dc_csbp;
+
+    WORD8 i1_ref_idx[4];
+    UWORD8 u1_mv[4][4];
+
+    UWORD8 u1_base_mode_flag;
+} isvce_mb_info_ctxt_t;
+
+/**
+ ******************************************************************************
+ *  @brief      CABAC Context structure : Variables to handle Cabac
+ ******************************************************************************
+ */
+typedef struct isvce_cabac_ctxt_t
+{
+    /*  Base pointer to all the cabac contexts  */
+    bin_ctxt_model au1_cabac_ctxt_table[NUM_SVC_CABAC_CTXTS];
+
+    cab_csbp_t s_lft_csbp;
+
+    /**
+     * pointer to Bitstream structure
+     */
+    bitstrm_t *ps_bitstrm;
+
+    /* Pointer to mb_info_ctxt_t map_base */
+    isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc_base;
+
+    /* Pointer to encoding_envirnoment_t */
+    encoding_envirnoment_t s_cab_enc_env;
+
+    /* These things need to be updated at each MbLevel */
+
+    /* Prev ps_mb_qp_delta_ctxt */
+    WORD8 i1_prevps_mb_qp_delta_ctxt;
+
+    /* Pointer to mb_info_ctxt_t map */
+    isvce_mb_info_ctxt_t *ps_mb_map_ctxt_inc;
+
+    /* Pointer to default mb_info_ctxt_t */
+    isvce_mb_info_ctxt_t *ps_def_ctxt_mb_info;
+
+    /* Pointer to current mb_info_ctxt_t */
+    isvce_mb_info_ctxt_t *ps_curr_ctxt_mb_info;
+
+    /* Pointer to left mb_info_ctxt_t */
+    isvce_mb_info_ctxt_t *ps_left_ctxt_mb_info;
+
+    /* Pointer to top mb_info_ctxt_t  */
+    isvce_mb_info_ctxt_t *ps_top_ctxt_mb_info;
+
+    /* Poniter to left csbp structure */
+    cab_csbp_t *ps_lft_csbp;
+    UWORD8 *pu1_left_y_ac_csbp;
+    UWORD8 *pu1_left_uv_ac_csbp;
+    UWORD8 *pu1_left_yuv_dc_csbp;
+
+    /***************************************************************************/
+    /*       Ref_idx contexts  are stored in the following way                 */
+    /*  Array Idx 0,1 for reference indices in Forward direction               */
+    /*  Array Idx 2,3 for reference indices in backward direction              */
+    /***************************************************************************/
+    /* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */
+    WORD8 i1_left_ref_idx_ctx_inc_arr[2][4];
+    WORD8 *pi1_left_ref_idx_ctxt_inc;
+
+    /* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */
+    UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4];
+    UWORD8 (*pu1_left_mv_ctxt_inc)[4];
+
+} isvce_cabac_ctxt_t;
+
+#endif
--- a/encoder/svc/isvce_cabac_utils.h
+++ b/encoder/svc/isvce_cabac_utils.h
@ -0,0 +1,88 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_cabac_utils.h
+*
+* @brief
+*  Contains function declarations for function declared in
+*  isvce_svc_cabac_utils.c
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_CABAC_UTILS_H_
+#define _ISVCE_CABAC_UTILS_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_macros.h"
+#include "isvc_defs.h"
+#include "isvc_cabac_tables.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_cabac.h"
+
+static FORCEINLINE void isvce_cabac_enc_base_mode_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
+                                                       UWORD8 u1_base_mode_flag)
+{
+    UWORD8 u1_ctx_inc;
+    UWORD8 u1_a, u1_b;
+
+    const UWORD32 u4_ctxidx_offset = BASE_MODE_FLAG;
+
+    u1_a = !ps_cabac_ctxt->ps_left_ctxt_mb_info->u1_base_mode_flag;
+    u1_b = !ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_base_mode_flag;
+
+    u1_ctx_inc = u1_a + u1_b;
+
+    isvce_cabac_encode_bin(ps_cabac_ctxt, u1_base_mode_flag,
+                           ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc);
+}
+
+static FORCEINLINE void isvce_cabac_enc_residual_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
+                                                                 UWORD8 u1_base_mode_flag,
+                                                                 UWORD8 u1_residual_prediction_flag)
+{
+    const UWORD32 u4_ctxidx_offset = RESIDUAL_PREDICTION_FLAG;
+    UWORD8 u1_ctx_inc = !u1_base_mode_flag;
+
+    isvce_cabac_encode_bin(ps_cabac_ctxt, u1_residual_prediction_flag,
+                           ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + u1_ctx_inc);
+}
+
+static FORCEINLINE void isvce_cabac_enc_motion_prediction_flag(isvce_cabac_ctxt_t *ps_cabac_ctxt,
+                                                               UWORD8 u1_motion_prediction_flag,
+                                                               UWORD8 u1_is_l0_mvp)
+{
+    const UWORD32 u4_ctxidx_offset =
+        u1_is_l0_mvp ? MOTION_PREDICTION_FLAG_L0 : MOTION_PREDICTION_FLAG_L1;
+
+    isvce_cabac_encode_bin(ps_cabac_ctxt, u1_motion_prediction_flag,
+                           ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset);
+}
+
+#endif
--- a/encoder/svc/isvce_cavlc.c
+++ b/encoder/svc/isvce_cavlc.c
--- a/encoder/svc/isvce_cavlc.h
+++ b/encoder/svc/isvce_cavlc.h
@ -0,0 +1,126 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file
+*  isvce_cavlc.h
+*
+* @brief
+*  This file contains enumerations, macros and extern declarations of H264
+*  cavlc tables
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef _ISVCE_CAVLC_H_
+#define _ISVCE_CAVLC_H_
+
+#include "ih264_typedefs.h"
+#include "isvce_defs.h"
+#include "isvce_structs.h"
+
+/*****************************************************************************/
+/* Function macro definitions                                                */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for an Intra Slice.
+*
+* @description
+*  The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+*  (if present), mb qp delta, coded block pattern, chroma mb mode and
+*  luma/chroma residue. These syntax elements are written as directed by table
+*  7.3.5 of h264 specification.
+*
+* @param[in] ps_ent_ctxt
+*  pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_write_islice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for Inter slices
+*
+* @description
+*  The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+*  (if present), mb qp delta, coded block pattern, chroma mb mode and
+*  luma/chroma residue. These syntax elements are written as directed by table
+*  7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+*  pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_write_pslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for Inter(B) slices
+*
+* @description
+*  The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+*  (if present), mb qp delta, coded block pattern, chroma mb mode and
+*  luma/chroma residue. These syntax elements are written as directed by table
+*  7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+*  pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_write_bslice_mb_cavlc(isvce_entropy_ctxt_t *ps_ent_ctxt);
+
+#if ENABLE_RE_ENC_AS_SKIP
+IH264E_ERROR_T isvce_reencode_as_skip_frame_cavlc(isvce_entropy_ctxt_t *ps_entropy);
+#endif
+
+#endif
--- a/encoder/svc/isvce_core_coding.c
+++ b/encoder/svc/isvce_core_coding.c
--- a/encoder/svc/isvce_core_coding.h
+++ b/encoder/svc/isvce_core_coding.h
@ -0,0 +1,125 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file
+*  isvce_core_coding.h
+*
+* @brief
+*  This file contains extern declarations of core coding routines
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef _ISVCE_CORE_CODING_H_
+#define _ISVCE_CORE_CODING_H_
+
+#include "isvce_structs.h"
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*  @brief      Enable/Disable Hadamard transform of DC Coeff's
+******************************************************************************
+*/
+#define DISABLE_DC_TRANSFORM 0
+#define ENABLE_DC_TRANSFORM 1
+
+/**
+*******************************************************************************
+*  @brief bit masks for DC and AC control flags
+*******************************************************************************
+*/
+
+#define DC_COEFF_CNT_LUMA_MB 16
+#define NUM_4X4_BLKS_LUMA_MB_ROW 4
+#define NUM_LUMA4x4_BLOCKS_IN_MB 16
+#define NUM_CHROMA4x4_BLOCKS_IN_MB 8
+
+#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4
+#define SIZE_4X4_BLK_VERT TRANS_SIZE_4
+
+#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF
+#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00
+
+#define CNTRL_FLAG_AC_MASK_CHROMA (CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V)
+#define CNTRL_FLAG_DC_MASK_CHROMA (CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V)
+
+#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000
+
+/**
+*******************************************************************************
+*  @brief macros for transforms
+*******************************************************************************
+*/
+#define DEQUEUE_BLKID_FROM_CONTROL(u4_cntrl, blk_lin_id) \
+    {                                                    \
+        blk_lin_id = CLZ(u4_cntrl);                      \
+        u4_cntrl &= (0x7FFFFFFF >> blk_lin_id);          \
+    };
+
+#define IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y) \
+    {                                                        \
+        i4_offset_x = (u4_blk_id % 4) << 2;                  \
+        i4_offset_y = (u4_blk_id / 4) << 2;                  \
+    }
+
+#define IS_V_BLK(u4_blk_id) ((u4_blk_id) > 3)
+
+#define IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y)        \
+    {                                                                 \
+        i4_offset_x = ((u4_blk_id & 0x1) << 3) + IS_V_BLK(u4_blk_id); \
+        i4_offset_y = (u4_blk_id & 0x2) << 1;                         \
+    }
+
+/* Typedefs */
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+extern FT_CORE_CODING isvce_code_luma_intra_macroblock_16x16;
+
+extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4;
+
+extern FT_CORE_CODING isvce_code_luma_intra_macroblock_4x4_rdopt_on;
+
+extern FT_CORE_CODING isvce_code_chroma_intra_macroblock_8x8;
+
+extern FT_CORE_CODING isvce_code_luma_inter_macroblock_16x16;
+
+extern FT_CORE_CODING isvce_code_chroma_inter_macroblock_8x8;
+
+#endif
--- a/encoder/svc/isvce_deblk.c
+++ b/encoder/svc/isvce_deblk.c
--- a/encoder/svc/isvce_deblk.h
+++ b/encoder/svc/isvce_deblk.h
@ -0,0 +1,53 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file
+*  isvce_deblk.h
+*
+* @brief
+*  This file contains extern declarations of deblocking routines
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef _ISVCE_DEBLK_H_
+#define _ISVCE_DEBLK_H_
+
+#include "ih264_typedefs.h"
+#include "isvce_structs.h"
+
+#define CSBP_LEFT_BLOCK_MASK 0x1111
+#define CSBP_RIGHT_BLOCK_MASK 0x8888
+
+#define NUM_EDGES_IN_MB 4
+
+extern void isvce_compute_bs(isvce_process_ctxt_t *ps_proc, UWORD8 u1_inter_layer_deblk_flag);
+
+extern void isvce_deblock_mb(isvce_process_ctxt_t *ps_proc, isvce_deblk_ctxt_t *ps_deblk,
+                             UWORD8 u1_inter_layer_deblk_flag);
+
+#endif
--- a/encoder/svc/isvce_defs.h
+++ b/encoder/svc/isvce_defs.h
@ -0,0 +1,345 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvce_defs.h
+*
+* @brief
+*  Definitions used in the encoder
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_DEFS_H_
+#define _ISVCE_DEFS_H_
+
+#include "ih264e_defs.h"
+
+#define SVC_MAX_NUM_BFRAMES 0
+
+#define DEFAULT_INIT_QP 1
+
+#define SVC_MAX_NUM_INP_FRAMES ((SVC_MAX_NUM_BFRAMES) + 2)
+
+#define LOG2_MAX_FRAME_NUM_MINUS4 12
+
+#define ENC_MAX_PU_IN_MB ((MB_SIZE / ENC_MIN_PU_SIZE) * (MB_SIZE / ENC_MIN_PU_SIZE))
+
+#define MAX_REF_FRAMES_PER_PRED_DIR 1
+
+#define SVC_MAX_SLICE_HDR_CNT 1
+
+#define MAX_LAYER_REFERENCE_PICS 1
+
+#define ENABLE_RESIDUAL_PREDICTION 1
+
+#define ENABLE_ILP_MV 1
+
+#define USE_ILP_MV_IN_ME (1 && (ENABLE_ILP_MV))
+
+#define USE_ILP_MV_AS_MVP (1 && (ENABLE_ILP_MV))
+
+#define MAX_MVP_IDX (USE_ILP_MV_AS_MVP ? 1 : 0)
+
+#define ENABLE_IBL_MODE 1
+
+#define ENABLE_INTRA_BASE_DEBLOCK (0 && (ENABLE_IBL_MODE))
+
+#define ENABLE_MODE_STAT_VISUALISER 0
+
+#define FORCE_FAST_INTRA4X4 0
+
+#define FORCE_DISTORTION_BASED_INTRA_4X4_GATING 1
+
+#define ENABLE_INTRA16X16_BASED_INTRA4X4_GATING 0
+
+#define ENABLE_ILP_BASED_INTRA4X4_GATING 0
+
+#define DISABLE_POST_ENC_SKIP 1
+
+#define ENABLE_RE_ENC_AS_SKIP 1
+
+#define MAX_ILP_MV_IN_NBR_RGN 4
+
+/* L, T, TL, TR, Zero, Skip, 'Temporal Skip', ILP */
+#define MAX_FPEL_SEARCH_CANDIDATES (7 + MAX_PU_IN_MB + MAX_ILP_MV_IN_NBR_RGN)
+
+#define NUM_SVCE_RC_MEMTABS 45
+
+#define SVCE_MAX_INP_DIM 1920
+
+#define SVCE_MAX_INP_FRAME_SIZE (1920 * 1088)
+
+/**
+ ***************************************************************************
+ * Enum to hold various mem records being request
+ ****************************************************************************
+ */
+typedef enum ISVCE_MEMREC_TYPES_T
+{
+    /**
+     * Codec Object at API level
+     */
+    ISVCE_MEM_REC_IV_OBJ,
+
+    /**
+     * Codec context
+     */
+    ISVCE_MEM_REC_CODEC,
+
+    /**
+     * Cabac context
+     */
+    ISVCE_MEM_REC_CABAC,
+
+    /**
+     * Cabac context_mb_info
+     */
+    ISVCE_MEM_REC_CABAC_MB_INFO,
+
+    /**
+     * entropy context
+     */
+    ISVCE_MEM_REC_ENTROPY,
+
+    /**
+     * Buffer to hold coeff data
+     */
+    ISVCE_MEM_REC_MB_COEFF_DATA,
+
+    /**
+     * Buffer to hold coeff data
+     */
+    ISVCE_MEM_REC_MB_HEADER_DATA,
+
+    /**
+     * Motion vector bank
+     */
+    ISVCE_MEM_REC_MVBANK,
+
+    /**
+     * Motion vector bits
+     */
+    ISVCE_MEM_REC_MVBITS,
+
+    /**
+     * Holds mem records passed to the codec.
+     */
+    ISVCE_MEM_REC_BACKUP,
+
+    /**
+     * Holds SPS
+     */
+    ISVCE_MEM_REC_SPS,
+
+    /**
+     * Holds PPS
+     */
+    ISVCE_MEM_REC_PPS,
+
+    /**
+     * Holds SVC NALU Extension data
+     */
+    ISVCE_MEM_REC_SVC_NALU_EXT,
+
+    /**
+     * Holds subset SPS data
+     */
+    ISVCE_MEM_REC_SUBSET_SPS,
+
+    /**
+     * Holds Slice Headers
+     */
+    ISVCE_MEM_REC_SLICE_HDR,
+
+    /**
+     * Holds SVC Slice Headers
+     */
+    ISVCE_MEM_REC_SVC_SLICE_HDR,
+
+    /**
+     * Contains map indicating slice index per MB basis
+     */
+    ISVCE_MEM_REC_SLICE_MAP,
+
+    /**
+     * Holds thread handles
+     */
+    ISVCE_MEM_REC_THREAD_HANDLE,
+
+    /**
+     * Holds control call mutex
+     */
+    ISVCE_MEM_REC_CTL_MUTEX,
+
+    /**
+     * Holds entropy call mutex
+     */
+    ISVCE_MEM_REC_ENTROPY_MUTEX,
+
+    /**
+     * Holds memory for Process JOB Queue
+     */
+    ISVCE_MEM_REC_PROC_JOBQ,
+
+    /**
+     * Holds memory for Entropy JOB Queue
+     */
+    ISVCE_MEM_REC_ENTROPY_JOBQ,
+
+    /**
+     * Contains status map indicating processing status per MB basis
+     */
+    ISVCE_MEM_REC_PROC_MAP,
+
+    /**
+     * Contains status map indicating deblocking status per MB basis
+     */
+    ISVCE_MEM_REC_DBLK_MAP,
+
+    /*
+     * Contains AIR map and mask
+     */
+    ISVCE_MEM_REC_AIR_MAP,
+
+    /**
+     * Contains status map indicating ME status per MB basis
+     */
+    ISVCE_MEM_REC_ME_MAP,
+
+    /**
+     * Holds dpb manager context
+     */
+    ISVCE_MEM_REC_DPB_MGR,
+
+    /**
+     * Holds intermediate buffers needed during processing stage
+     * Memory for process contexts is allocated in this memtab
+     */
+    ISVCE_MEM_REC_PROC_SCRATCH,
+
+    /**
+     * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+     */
+    ISVCE_MEM_REC_QUANT_PARAM,
+
+    /**
+     * Holds top row syntax information
+     */
+    ISVCE_MEM_REC_TOP_ROW_SYN_INFO,
+
+    /**
+     * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+     */
+    ISVCE_MEM_REC_BS_QP,
+
+    /**
+     * Holds input buffer manager context
+     */
+    ISVCE_MEM_REC_INP_PIC,
+
+    /**
+     * Holds output buffer manager context
+     */
+    ISVCE_MEM_REC_OUT,
+
+    /**
+     * Holds picture buffer manager context and array of pic_buf_ts
+     * Also holds reference picture buffers in non-shared mode
+     */
+    ISVCE_MEM_REC_REF_PIC,
+
+    /*
+     * Mem record for color space conversion
+     */
+    ISVCE_MEM_REC_CSC,
+
+    /**
+     * NMB info struct
+     */
+    ISVCE_MEM_REC_MB_INFO_NMB,
+
+    /**
+     * SVC Spatial layer Inputs
+     */
+    ISVCE_MEM_SVC_SPAT_INP,
+
+    /**
+     * Downscaler memory records
+     */
+    ISVCE_MEM_DOWN_SCALER,
+
+    /**
+     * SVC ILP data
+     */
+    ISVCE_MEM_SVC_ILP_DATA,
+
+    /**
+     * SVC ILP MV Context
+     */
+    ISVCE_MEM_SVC_ILP_MV_CTXT,
+
+    /**
+     * SVC ResPred Context
+     */
+    ISVCE_MEM_SVC_RES_PRED_CTXT,
+
+    /**
+     * SVC inter-layer intra pred context
+     */
+    ISVCE_MEM_SVC_INTRA_PRED_CTXT,
+
+    /**
+     * RC Utils Context
+     */
+    ISVCE_MEM_SVC_RC_UTILS_CTXT,
+
+    /**
+     * SubPic RC Context
+     */
+    ISVCE_MEM_SVC_SUB_PIC_RC_CTXT,
+
+#if ENABLE_MODE_STAT_VISUALISER
+    ISVCE_MEM_MODE_STAT_VISUALISER_BUF,
+#endif
+
+    /**
+     * Rate control of memory records.
+     */
+    ISVCE_MEM_REC_RC,
+
+    /**
+     * Place holder to compute number of memory records.
+     */
+    ISVCE_MEM_REC_CNT = ISVCE_MEM_REC_RC + NUM_SVCE_RC_MEMTABS,
+
+    /*
+     * Do not add anything below
+     */
+} ISVCE_MEMREC_TYPES_T;
+
+#endif
--- a/encoder/svc/isvce_downscaler.c
+++ b/encoder/svc/isvce_downscaler.c
@ -0,0 +1,537 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_downscaler.c
+*
+* @brief
+*  Contains downscaler functions required by the SVC encoder
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - isvce_get_downscaler_data_size()
+*  - isvce_get_downscaler_padding_dims()
+*  - isvce_get_downscaler_normalized_filtered_pixel()
+*  - isvce_horizontal_downscale_and_transpose()
+*  - isvce_process_downscaler()
+*  - isvce_initialize_downscaler()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* system include files */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "isvc_macros.h"
+#include "ih264_platform_macros.h"
+#include "iv2.h"
+#include "isvc_defs.h"
+#include "isvce_defs.h"
+#include "isvc_structs.h"
+#include "isvc_structs.h"
+#include "isvce_downscaler.h"
+#include "isvce_downscaler_private_defs.h"
+
+/**
+******************************************************************************
+* @brief  lanczos filter coefficients for 2x downscaling
+* @remarks Though the length of the filter is 8, the
+* same coefficients
+* are replicated so that 2 rows can be processed at one
+* go in SIMD
+******************************************************************************
+*/
+static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = {
+    {-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0},
+    {-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0},
+    {-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1},
+    {-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2},
+    {-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3},
+    {-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4},
+    {-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5},
+    {0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}};
+
+/**
+******************************************************************************
+* @brief  lanczos filter coefficients for 1.5x downscaling
+* @remarks Though the length of the filter is 8, the same coefficients
+* are replicated so that 2 rows can be processed at one go in SIMD.
+******************************************************************************
+*/
+static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] =
+    {{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0},
+     {0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0},
+     {0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0},
+     {0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0},
+     {0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0},
+     {0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0},
+     {0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0},
+     {0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}};
+
+/**
+*******************************************************************************
+*
+* @brief
+*   gets the memory size required for downscaler
+*
+* @par Description:
+*   returns the memory required by the downscaler context and state structs
+*   for allocation.
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
+                                       UWORD32 u4_width, UWORD32 u4_height)
+{
+    UWORD32 u4_size = 0;
+
+    if(u1_num_spatial_layers > 1)
+    {
+        u4_size += sizeof(downscaler_state_t);
+
+        u4_size +=
+            (u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor));
+    }
+
+    return u4_size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   gets the padding size required for filtering
+*
+* @par Description:
+*   gets the padding size required for filtering
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims)
+{
+    ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
+    ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
+    ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2;
+    ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   processes downscaler
+*
+* @par Description:
+*   calls the function for padding and scaling
+*
+* @param[in] ps_scaler
+*  pointer to downdownscaler context
+*
+* @param[in] ps_src_buf_props
+*  pointer to source buffer props struct
+*
+* @param[in] u4_blk_wd
+*  width of the block to be processed
+*
+* @param[in] u4_blk_ht
+*  height of the block to be processed
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, yuv_buf_props_t *ps_src_buf_props,
+                              yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
+                              UWORD32 u4_blk_ht)
+{
+    buffer_container_t s_src_buf;
+    buffer_container_t s_dst_buf;
+
+    UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y;
+
+    downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
+
+    ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV);
+
+    u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor);
+    u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor);
+
+    /* luma */
+    s_src_buf = ps_src_buf_props->as_component_bufs[Y];
+    s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) -
+                        (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
+
+    s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
+    s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
+
+    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
+                                   u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0);
+
+    s_src_buf = s_dst_buf;
+    s_dst_buf = ps_dst_buf_props->as_component_bufs[Y];
+
+    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
+                                   u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
+
+    /* chroma */
+    u4_blk_ht /= 2;
+    u4_scaled_block_size_y /= 2;
+
+    s_src_buf = ps_src_buf_props->as_component_bufs[U];
+    s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS -
+                        (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
+
+    s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
+    s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
+
+    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
+                                   u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1);
+
+    s_src_buf = s_dst_buf;
+    s_dst_buf = ps_dst_buf_props->as_component_bufs[U];
+
+    ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
+                                   u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   normalized dot product computer for downscaler
+*
+* @par Description:
+*   Given the downscaler filter coefficients, source buffer, the function
+*   calculates the dot product between them, adds an offset and normalizes it
+*
+* @param[in] ps_scaler
+*  pointer to src buf
+*
+* @param[in] pi1_filter
+*  pointer to filter coefficients
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 *pu1_src, WORD8 *pi1_filter)
+{
+    WORD32 i;
+    WORD32 i4_norm_dot_product;
+    UWORD8 u1_out_pixel;
+    WORD32 i4_dot_product_sum = 0;
+    WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1);
+    WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q;
+
+    for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++)
+    {
+        i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]);
+    }
+
+    i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor);
+    u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product);
+
+    return u1_out_pixel;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   horizontal scaler function
+*
+* @par Description:
+*   Does horizontal scaling for the given block
+*
+* @param[in] ps_scaler
+*  pointer to downscaler context
+*
+* @param[in] ps_src
+*  pointer to source buffer container
+*
+* @param[in] ps_dst
+*  pointer to destination buffer container
+*
+* @param[in] pai1_filters
+*  pointer to array of downscaler filters
+*
+* @param[in] u4_blk_wd
+*  width of the block after horizontal scaling (output block width)
+*
+* @param[in] u4_blk_ht
+*  height of the current block (input block height)
+*
+* @param[in] u1_is_chroma
+*  flag suggesting whether the buffer is luma or chroma
+*
+*
+* @returns
+*
+* @remarks
+*  The same function is used for vertical scaling too as
+*  the horizontally scaled input in stored in transpose fashion.
+*
+*******************************************************************************
+*/
+
+static void isvce_horizontal_downscale_and_transpose(
+    downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
+    FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
+{
+    WORD32 i, j, k;
+    UWORD8 u1_phase;
+    UWORD8 u1_filtered_out_pixel;
+    UWORD8 *pu1_src_j, *pu1_dst_j;
+    UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel;
+    UWORD8 *pu1_in_pixel;
+    UWORD8 *pu1_out_pixel;
+    WORD8 *pi1_filter_grid;
+    UWORD16 u2_full_pixel_inc;
+    UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS];
+    UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS];
+
+    downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
+
+    UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
+    UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
+    UWORD8 *pu1_src = ps_src->pv_data;
+    UWORD32 u4_in_stride = ps_src->i4_data_stride;
+    UWORD8 *pu1_dst = ps_dst->pv_data;
+    UWORD32 u4_out_stride = ps_dst->i4_data_stride;
+    UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
+
+    /* Offset the input so that the input pixel to be processed
+    co-incides with the centre of filter (4th coefficient)*/
+    pu1_src += (1 + u1_is_chroma);
+
+    ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment);
+
+    if(!u1_is_chroma)
+    {
+        for(j = 0; j < (WORD32) u4_blk_ht; j++)
+        {
+            pu1_src_j = pu1_src + (j * u4_in_stride);
+            pu1_dst_j = pu1_dst + j;
+
+            u4_center_pixel_pos = u4_center_pixel_pos_src;
+
+            for(i = 0; i < (WORD32) u4_blk_wd; i++)
+            {
+                u1_phase = get_filter_phase(u4_center_pixel_pos);
+                pi1_filter_grid = pai1_filters[u1_phase];
+
+                /* Doing the Calculation for current Loop Count  */
+                u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+                pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+                pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                u1_filtered_out_pixel =
+                    isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid);
+                *pu1_out_pixel = u1_filtered_out_pixel;
+
+                /* Update the context for next Loop Count */
+                u4_center_pixel_pos += u4_src_horz_increments;
+            }
+        }
+    }
+    else
+    {
+        for(j = 0; j < (WORD32) u4_blk_ht; j++)
+        {
+            pu1_src_j = pu1_src + (j * u4_in_stride);
+            pu1_dst_j = pu1_dst + j;
+
+            u4_center_pixel_pos = u4_center_pixel_pos_src;
+
+            for(i = 0; i < (WORD32) u4_blk_wd; i++)
+            {
+                u1_phase = get_filter_phase(u4_center_pixel_pos);
+                pi1_filter_grid = pai1_filters[u1_phase];
+
+                /*Doing the Calculation for current Loop Count  */
+                u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
+                pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
+                pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
+
+                for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++)
+                {
+                    au1_temp_u_buff[k] = *(pu1_in_pixel + (2 * k));
+                    au1_temp_v_buff[k] = *(pu1_in_pixel + ((2 * k) + 1));
+                }
+
+                u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel(
+                    au1_temp_u_buff, pi1_filter_grid);
+                u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel(
+                    au1_temp_v_buff, pi1_filter_grid);
+                *pu1_out_pixel = u1_filtered_out_u_pixel;
+                *(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel;
+
+                /* Update the context for next Loop Count */
+                u4_center_pixel_pos += u4_src_horz_increments;
+            }
+        }
+    }
+}
+
+void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch)
+{
+    switch(e_arch)
+    {
+#if defined(X86)
+        case ARCH_X86_SSE42:
+        {
+            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42;
+
+            break;
+        }
+#elif defined(ARMV8)
+        case ARCH_ARM_A53:
+        case ARCH_ARM_A57:
+        case ARCH_ARM_V8_NEON:
+        {
+            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
+
+            break;
+        }
+#elif !defined(DISABLE_NEON)
+        case ARCH_ARM_A9Q:
+        case ARCH_ARM_A9A:
+        case ARCH_ARM_A9:
+        case ARCH_ARM_A7:
+        case ARCH_ARM_A5:
+        case ARCH_ARM_A15:
+        {
+            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
+
+            break;
+        }
+#endif
+        default:
+        {
+            ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose;
+
+            break;
+        }
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   initializes the downscaler context
+*
+* @par Description:
+*   initializes the downscaler context for the given scaling factor
+*   with padding size, filter size, etc.
+*
+* @param[in] ps_scaler
+*   pointer downscaler context
+*
+* @param[in] ps_mem_rec
+*   pointer to memory allocated to downscaler process
+*
+* @param[in] d_scaling_factor
+*   scaling reatio of width/ height between two consecutive SVC layers
+*
+* @param[in] u1_num_spatial_layers
+*   scaling reatio of width/ height between two consecutive SVC layers
+*
+* @param[in] u4_wd
+*   width of the input
+*
+* @param[in] u4_ht
+*   height of the input
+*
+* @param[in] e_arch
+*   architecure type
+*
+* @returns
+*
+* @remarks
+*  when ARM intrinsics are added, update should be done here
+*
+*******************************************************************************
+*/
+
+void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
+                                 DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
+                                 UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch)
+{
+    if(u1_num_spatial_layers > 1)
+    {
+        downscaler_state_t *ps_scaler_state;
+
+        UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+        ps_scaler_state = (downscaler_state_t *) pu1_buf;
+        pu1_buf += sizeof(ps_scaler_state[0]);
+
+        ps_scaler_state->pv_scratch_buf = pu1_buf;
+        ps_scaler_state->u4_in_wd = u4_in_width;
+        ps_scaler_state->u4_in_ht = u4_in_height;
+
+        ps_scaler->pv_scaler_state = ps_scaler_state;
+        ps_scaler->d_scaling_factor = d_scaling_factor;
+        ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers;
+
+        isvce_downscaler_function_selector(ps_scaler_state, e_arch);
+
+        ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q));
+
+        ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q);
+        ps_scaler_state->i4_init_offset = 0;
+        ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x
+                                                                  : gai1_lanczos_coefficients_3by2x;
+    }
+}
--- a/encoder/svc/isvce_downscaler.h
+++ b/encoder/svc/isvce_downscaler.h
@ -0,0 +1,205 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_downscaler.h
+*
+* @brief
+*  Contains downscaler functions required by the SVC encoder
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - isvce_get_downscaler_data_size()
+*  - isvce_get_downscaler_padding_dims()
+*  - isvce_isvce_process_ctxt_t_downscaler()
+*  - isvce_get_downscaler_normalized_filtered_pixel()
+*  - isvce_horizontal_downscale_and_transpose()
+*  - isvce_process_downscaler()
+*  - isvce_initialize_downscaler()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_DOWNSCALER_H_
+#define _ISVCE_DOWNSCALER_H_
+
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "isvc_defs.h"
+#include "isvc_structs.h"
+#include "isvce_defs.h"
+
+typedef struct
+{
+    /**
+     * pointer to the state of downscaler
+     */
+    void *pv_scaler_state;
+
+    /**
+     * scaling factor between the dimensions of two consecutive SVC layers
+     */
+    DOUBLE d_scaling_factor;
+
+    /**
+     * Num spatial layers
+     */
+    UWORD8 u1_num_spatial_layers;
+
+} downscaler_ctxt_t;
+
+typedef struct
+{
+    UWORD8 u1_left_pad_size;
+
+    UWORD8 u1_right_pad_size;
+
+    UWORD8 u1_top_pad_size;
+
+    UWORD8 u1_bottom_pad_size;
+
+} padding_dims_t;
+
+/**
+*******************************************************************************
+*
+* @brief
+*   initializes the downscaler context
+*
+* @par Description:
+*   initializes the downscaler context for the given scaling factor
+*   with padding size, filter size, etc.
+*
+* @param[in] ps_scaler
+*   pointer downscaler context
+*
+* @param[in] ps_mem_rec
+*   pointer to memory allocated to downscaler process
+*
+* @param[in] d_scaling_factor
+*   scaling reatio of width/ height between two consecutive SVC layers
+*
+* @param[in] u1_num_spatial_layers
+*   scaling reatio of width/ height between two consecutive SVC layers
+*
+* @param[in] u4_wd
+*   width of the input
+*
+* @param[in] u4_ht
+*   height of the input
+*
+* @param[in] e_arch
+*   architecure type
+*
+* @returns
+*
+* @remarks
+*  when ARM intrinsics are added, update should be done here
+*
+*******************************************************************************
+*/
+
+extern void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
+                                        DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
+                                        UWORD32 u4_in_width, UWORD32 u4_in_height,
+                                        IV_ARCH_T e_arch);
+
+/**
+*******************************************************************************
+*
+* @brief
+*   gets the memory size required for downscaler
+*
+* @par Description:
+*   returns the memory required by the downscaler context and state structs
+*   for allocation.
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+extern UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
+                                              UWORD32 u4_width, UWORD32 u4_height);
+
+/**
+*******************************************************************************
+*
+* @brief
+*   processes downscaler
+*
+* @par Description:
+*   calls the function for padding and scaling
+*
+* @param[in] ps_scaler
+*  pointer to downdownscaler context
+*
+* @param[in] ps_src_buf_props
+*  pointer to source buffer props struct
+*
+* @param[in] u4_blk_wd
+*  width of the block to be processed
+*
+* @param[in] u4_blk_ht
+*  height of the block to be processed
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+extern void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler,
+                                     yuv_buf_props_t *ps_src_buf_props,
+                                     yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
+                                     UWORD32 u4_blk_ht);
+
+/**
+*******************************************************************************
+*
+* @brief
+*   gets the padding size required for filtering
+*
+* @par Description:
+*   gets the padding size required for filtering
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+extern void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims);
+
+#endif
--- a/encoder/svc/isvce_downscaler_private_defs.h
+++ b/encoder/svc/isvce_downscaler_private_defs.h
@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+#ifndef _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_
+#define _ISVCE_DOWNSCALER_PRIVATE_DEFS_H_
+#include "ih264_typedefs.h"
+#include "isvc_macros.h"
+#include "ih264_debug.h"
+#include "isvc_structs.h"
+#include "isvce_downscaler.h"
+
+/* Macros */
+#define DOWNSCALER_Q 16
+
+#define FILTER_COEFF_Q 7
+
+#define NUM_SCALER_FILTER_TAPS 8
+
+#define NUM_SCALER_FILTER_PHASES 8
+
+/* Typedefs */
+typedef WORD8 (*FILTER_COEFF_ARRAY)[NUM_SCALER_FILTER_TAPS * 2];
+
+typedef void FT_DOWNSCALER(downscaler_ctxt_t *ps_scaler_state, buffer_container_t *ps_src,
+                           buffer_container_t *ps_dst, FILTER_COEFF_ARRAY pai1_filters,
+                           UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma);
+
+/* Structs */
+typedef struct
+{
+    /**
+     * pointer to scratch buf
+     */
+    void *pv_scratch_buf;
+
+    /**
+     * initial offset while calculating input pixel location
+     */
+    WORD32 i4_init_offset;
+
+    /**
+     * increment to the centre pixel in horizontal direction
+     */
+    UWORD32 u4_horz_increment;
+
+    /**
+     * increment to the centre pixel in vertical direction
+     */
+    UWORD32 u4_vert_increment;
+
+    /**
+     * pointer to the filter coefficients
+     */
+    FILTER_COEFF_ARRAY pai1_filters;
+
+    /**
+     * function pointer to the leaf level function for horizontal scaling
+     */
+    FT_DOWNSCALER *pf_downscaler;
+
+    /**
+     * width of the input (highest SVC layer)
+     */
+    UWORD32 u4_in_wd;
+
+    /**
+     * height of the input (highest SVC layer)
+     */
+    UWORD32 u4_in_ht;
+
+} downscaler_state_t;
+
+static FORCEINLINE UWORD32 get_filter_phase(UWORD32 u4_center_pixel_pos)
+{
+    UWORD32 au4_phase_binning_pos[NUM_SCALER_FILTER_PHASES + 1];
+    UWORD32 i;
+
+    ASSERT(NUM_SCALER_FILTER_PHASES == 8);
+
+    for(i = 0; i < NUM_SCALER_FILTER_PHASES + 1; i++)
+    {
+        au4_phase_binning_pos[i] = (i << DOWNSCALER_Q) / NUM_SCALER_FILTER_PHASES;
+    }
+
+    u4_center_pixel_pos = u4_center_pixel_pos % (1 << DOWNSCALER_Q);
+
+    for(i = 0; i < NUM_SCALER_FILTER_PHASES; i++)
+    {
+        if((u4_center_pixel_pos < au4_phase_binning_pos[i + 1]) &&
+           (u4_center_pixel_pos >= au4_phase_binning_pos[i]))
+        {
+            return i;
+        }
+    }
+
+    ASSERT(0);
+
+    return 0;
+}
+
+/* SSE42 Declarations */
+extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_sse42;
+
+/* NEON Declarations */
+extern FT_DOWNSCALER isvce_horizontal_downscale_and_transpose_neon;
+
+#endif
--- a/encoder/svc/isvce_encode.c
+++ b/encoder/svc/isvce_encode.c
@ -0,0 +1,790 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file
+*  isvce_encode.c
+*
+* @brief
+*  This file contains functions for encoding the input yuv frame in synchronous
+*  api mode
+*
+* @author
+*  ittiam
+*
+* List of Functions
+*  - isvce_join_threads()
+*  - isvce_wait_for_thread()
+*  - isvce_encode()
+*
+******************************************************************************
+*/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include <math.h>
+#include <stdbool.h>
+
+#include "ih264_typedefs.h"
+/* Dependencies of ih264_buf_mgr.h */
+/* Dependencies of ih264_list.h */
+#include "ih264_error.h"
+/* Dependencies of ih264_common_tables.h */
+#include "ih264_defs.h"
+#include "ih264_structs.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264_platform_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+/* Dependencies of ih264e_cabac_structs.h */
+#include "ih264_cabac_tables.h"
+/* Dependencies of ime_structs.h */
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+/* Dependencies of ih264e_structs.h */
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+/* Dependencies of ih264e_bitstream.h */
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264e_cabac_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ime_statistics.h"
+#include "ime_structs.h"
+/* Dependencies of 'ih264e_utils.h' */
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "ime.h"
+#include "isvce.h"
+#include "isvce_cabac.h"
+#include "isvce_deblk.h"
+#include "isvce_defs.h"
+#include "isvce_downscaler.h"
+#include "isvce_encode_header.h"
+#include "isvce_fmt_conv.h"
+#include "isvce_ibl_eval.h"
+#include "isvce_ilp_mv.h"
+#include "isvce_intra_modes_eval.h"
+#include "isvce_me.h"
+#include "isvce_process.h"
+#include "isvce_rate_control.h"
+#include "isvce_residual_pred.h"
+#include "isvce_sub_pic_rc.h"
+#include "isvce_utils.h"
+
+#define SEI_BASED_FORCE_IDR 1
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function puts the current thread to sleep for a duration
+*  of sleep_us
+*
+* @par Description
+*  ithread_yield() method causes the calling thread to yield execution to
+*another thread that is ready to run on the current processor. The operating
+*system selects the thread to yield to. ithread_usleep blocks the current thread
+*for the specified number of milliseconds. In other words, yield just says, end
+*my timeslice prematurely, look around for other threads to run. If there is
+*nothing better than me, continue. Sleep says I don't want to run for x
+*  milliseconds. Even if no other thread wants to run, don't make me run.
+*
+* @param[in] sleep_us
+*  thread sleep duration
+*
+* @returns error_status
+*
+******************************************************************************
+*/
+IH264E_ERROR_T isvce_wait_for_thread(UWORD32 sleep_us)
+{
+    /* yield thread */
+    ithread_yield();
+
+    /* put thread to sleep */
+    ithread_sleep(sleep_us);
+
+    return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  Encodes in synchronous api mode
+*
+* @par Description
+*  This routine processes input yuv, encodes it and outputs bitstream and recon
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns  Status
+*
+******************************************************************************
+*/
+WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
+{
+    /* error status */
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+    /* codec ctxt */
+    isvce_codec_t *ps_codec = (isvce_codec_t *) ps_codec_obj->pv_codec_handle;
+
+    /* input frame to encode */
+    isvce_video_encode_ip_t *ps_video_encode_ip = pv_api_ip;
+
+    /* output buffer to write stream */
+    isvce_video_encode_op_t *ps_video_encode_op = pv_api_op;
+
+    /* i/o structures */
+    isvce_inp_buf_t s_inp_buf;
+    isvce_out_buf_t s_out_buf;
+
+    WORD32 ctxt_sel = 0, i4_rc_pre_enc_skip;
+    WORD32 i, j;
+
+    ASSERT(MAX_CTXT_SETS == 1);
+
+    /********************************************************************/
+    /*                            BEGIN INIT                            */
+    /********************************************************************/
+    /* reset output structure */
+    ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+    ps_video_encode_op->s_ive_op.output_present = 0;
+    ps_video_encode_op->s_ive_op.dump_recon = 0;
+    ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+
+    /* Check for output memory allocation size */
+    {
+        UWORD32 u4_min_bufsize =
+            MIN_STREAM_SIZE * ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
+        UWORD32 u4_bufsize_per_layer = ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize /
+                                       ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
+
+        if(ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize < u4_min_bufsize)
+        {
+            error_status = IH264E_INSUFFICIENT_OUTPUT_BUFFER;
+
+            SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM,
+                                ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+        }
+
+        for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
+        {
+            s_out_buf.as_bits_buf[i] = ps_video_encode_ip->s_ive_ip.s_out_buf;
+
+            s_out_buf.as_bits_buf[i].u4_bufsize = u4_bufsize_per_layer;
+            s_out_buf.as_bits_buf[i].pv_buf =
+                ((UWORD8 *) ps_video_encode_ip->s_ive_ip.s_out_buf.pv_buf) +
+                u4_bufsize_per_layer * i;
+        }
+    }
+
+    s_out_buf.u4_is_last = 0;
+    s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+    s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+
+    /* api call cnt */
+    ps_codec->i4_encode_api_call_cnt += 1;
+
+    /* codec context selector */
+    ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
+
+    /* reset status flags */
+    ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
+    ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0;
+    ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0;
+
+    /* pass output buffer to codec */
+    ps_codec->as_out_buf[ctxt_sel] = s_out_buf;
+
+    /* initialize codec ctxt with default params for the first encode api call */
+    if(ps_codec->i4_encode_api_call_cnt == 0)
+    {
+        isvce_codec_init(ps_codec);
+    }
+
+    /* parse configuration params */
+    for(i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
+    {
+        isvce_cfg_params_t *ps_cfg = &ps_codec->as_cfg[i];
+
+        if(1 == ps_cfg->u4_is_valid)
+        {
+            if(((ps_cfg->u4_timestamp_high == ps_video_encode_ip->s_ive_ip.u4_timestamp_high) &&
+                (ps_cfg->u4_timestamp_low == ps_video_encode_ip->s_ive_ip.u4_timestamp_low)) ||
+               ((WORD32) ps_cfg->u4_timestamp_high == -1) ||
+               ((WORD32) ps_cfg->u4_timestamp_low == -1))
+            {
+                error_status = isvce_codec_update_config(ps_codec, ps_cfg);
+                SET_ERROR_ON_RETURN(error_status, IVE_UNSUPPORTEDPARAM,
+                                    ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+
+                ps_cfg->u4_is_valid = 0;
+            }
+        }
+    }
+    /* Force IDR based on SEI params */
+#if SEI_BASED_FORCE_IDR
+    {
+        sei_mdcv_params_t *ps_sei_mdcv_params = &ps_codec->s_sei.s_sei_mdcv_params;
+        sei_mdcv_params_t *ps_cfg_sei_mdcv_params = &ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
+        sei_cll_params_t *ps_sei_cll_params = &ps_codec->s_sei.s_sei_cll_params;
+        sei_cll_params_t *ps_cfg_sei_cll_params = &ps_codec->s_cfg.s_sei.s_sei_cll_params;
+        sei_ave_params_t *ps_sei_ave_params = &ps_codec->s_sei.s_sei_ave_params;
+        sei_ave_params_t *ps_cfg_sei_ave_params = &ps_codec->s_cfg.s_sei.s_sei_ave_params;
+
+        if((ps_sei_mdcv_params->au2_display_primaries_x[0] !=
+            ps_cfg_sei_mdcv_params->au2_display_primaries_x[0]) ||
+           (ps_sei_mdcv_params->au2_display_primaries_x[1] !=
+            ps_cfg_sei_mdcv_params->au2_display_primaries_x[1]) ||
+           (ps_sei_mdcv_params->au2_display_primaries_x[2] !=
+            ps_cfg_sei_mdcv_params->au2_display_primaries_x[2]) ||
+           (ps_sei_mdcv_params->au2_display_primaries_y[0] !=
+            ps_cfg_sei_mdcv_params->au2_display_primaries_y[0]) ||
+           (ps_sei_mdcv_params->au2_display_primaries_y[1] !=
+            ps_cfg_sei_mdcv_params->au2_display_primaries_y[1]) ||
+           (ps_sei_mdcv_params->au2_display_primaries_y[2] !=
+            ps_cfg_sei_mdcv_params->au2_display_primaries_y[2]) ||
+           (ps_sei_mdcv_params->u2_white_point_x != ps_cfg_sei_mdcv_params->u2_white_point_x) ||
+           (ps_sei_mdcv_params->u2_white_point_y != ps_cfg_sei_mdcv_params->u2_white_point_y) ||
+           (ps_sei_mdcv_params->u4_max_display_mastering_luminance !=
+            ps_cfg_sei_mdcv_params->u4_max_display_mastering_luminance) ||
+           (ps_sei_mdcv_params->u4_min_display_mastering_luminance !=
+            ps_cfg_sei_mdcv_params->u4_min_display_mastering_luminance))
+        {
+            ps_codec->s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
+            ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 1;
+        }
+        else
+        {
+            ps_codec->s_sei.u1_sei_mdcv_params_present_flag = 0;
+        }
+
+        if((ps_sei_cll_params->u2_max_content_light_level !=
+            ps_cfg_sei_cll_params->u2_max_content_light_level) ||
+           (ps_sei_cll_params->u2_max_pic_average_light_level !=
+            ps_cfg_sei_cll_params->u2_max_pic_average_light_level))
+        {
+            ps_codec->s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
+            ps_codec->s_sei.u1_sei_cll_params_present_flag = 1;
+        }
+        else
+        {
+            ps_codec->s_sei.u1_sei_cll_params_present_flag = 0;
+        }
+
+        if((ps_sei_ave_params->u4_ambient_illuminance !=
+            ps_cfg_sei_ave_params->u4_ambient_illuminance) ||
+           (ps_sei_ave_params->u2_ambient_light_x != ps_cfg_sei_ave_params->u2_ambient_light_x) ||
+           (ps_sei_ave_params->u2_ambient_light_y != ps_cfg_sei_ave_params->u2_ambient_light_y))
+        {
+            ps_codec->s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
+            ps_codec->s_sei.u1_sei_ave_params_present_flag = 1;
+        }
+        else
+        {
+            ps_codec->s_sei.u1_sei_ave_params_present_flag = 0;
+        }
+
+        if((1 == ps_codec->s_sei.u1_sei_mdcv_params_present_flag) ||
+           (1 == ps_codec->s_sei.u1_sei_cll_params_present_flag) ||
+           (1 == ps_codec->s_sei.u1_sei_ave_params_present_flag))
+        {
+            ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+        }
+    }
+#endif
+
+    /* In case of alt ref and B pics we will have non reference frame in stream */
+    if(ps_codec->s_cfg.u4_enable_alt_ref || ps_codec->s_cfg.u4_num_bframes)
+    {
+        ps_codec->i4_non_ref_frames_in_stream = 1;
+    }
+
+    if(ps_codec->i4_encode_api_call_cnt == 0)
+    {
+        /********************************************************************/
+        /*   number of mv/ref bank buffers used by the codec,               */
+        /*      1 to handle curr frame                                      */
+        /*      1 to store information of ref frame                         */
+        /*      1 more additional because of the codec employs 2 ctxt sets  */
+        /*        to assist asynchronous API                                */
+        /********************************************************************/
+
+        /* initialize mv bank buffer manager */
+        error_status = isvce_svc_au_data_mgr_add_bufs(ps_codec);
+
+        SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+
+        /* initialize ref bank buffer manager */
+        error_status = isvce_svc_au_buf_mgr_add_bufs(ps_codec);
+
+        SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+
+        /* for the first frame, generate header when not requested explicitly */
+        if(ps_codec->i4_header_mode == 0 && ps_codec->u4_header_generated == 0)
+        {
+            ps_codec->i4_gen_header = 1;
+        }
+    }
+
+    /* generate header and return when encoder is operated in header mode */
+    if(ps_codec->i4_header_mode == 1)
+    {
+        /* whenever the header is generated, this implies a start of sequence
+         * and a sequence needs to be started with IDR
+         */
+        ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+
+        s_inp_buf.s_svc_params = ps_codec->s_cfg.s_svc_params;
+        s_inp_buf.s_inp_props.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
+        s_inp_buf.s_inp_props.s_raw_buf.au4_wd[Y] = ps_codec->s_cfg.u4_wd;
+        s_inp_buf.s_inp_props.s_raw_buf.au4_ht[Y] = ps_codec->s_cfg.u4_ht;
+
+        isvce_init_svc_dimension(&s_inp_buf);
+
+        /* generate header */
+        error_status = isvce_generate_sps_pps(ps_codec, &s_inp_buf);
+
+        /* send the input to app */
+        ps_video_encode_op->s_ive_op.s_inp_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
+        ps_video_encode_op->s_ive_op.u4_timestamp_low =
+            ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+        ps_video_encode_op->s_ive_op.u4_timestamp_high =
+            ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+
+        ps_video_encode_op->s_ive_op.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+
+        /* send the output to app */
+        ps_video_encode_op->s_ive_op.output_present = 1;
+        ps_video_encode_op->s_ive_op.dump_recon = 0;
+        ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0];
+
+        for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
+        {
+            memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf +
+                     ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes),
+                    ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf,
+                    ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes);
+
+            ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes +=
+                ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes;
+        }
+
+        /* error status */
+        SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+
+        /* indicates that header has been generated previously */
+        ps_codec->u4_header_generated = 1;
+
+        /* api call cnt */
+        ps_codec->i4_encode_api_call_cnt--;
+
+        /* header mode tag is not sticky */
+        ps_codec->i4_header_mode = 0;
+        ps_codec->i4_gen_header = 0;
+
+        return IV_SUCCESS;
+    }
+
+    /* curr pic cnt */
+    ps_codec->i4_pic_cnt += 1;
+
+    i4_rc_pre_enc_skip = 0;
+    for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
+    {
+        i4_rc_pre_enc_skip =
+            isvce_input_queue_update(ps_codec, &ps_video_encode_ip->s_ive_ip, &s_inp_buf, i);
+    }
+
+    s_out_buf.u4_is_last = s_inp_buf.s_inp_props.u4_is_last;
+    ps_video_encode_op->s_ive_op.u4_is_last = s_inp_buf.s_inp_props.u4_is_last;
+
+    /* Only encode if the current frame is not pre-encode skip */
+    if(!i4_rc_pre_enc_skip && s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
+    {
+        isvce_process_ctxt_t *ps_proc = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS];
+
+        WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
+
+        ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt;
+
+        error_status = isvce_svc_au_init(ps_codec, &s_inp_buf);
+
+        SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+
+        isvce_nalu_info_au_init(ps_codec->as_nalu_descriptors,
+                                ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers);
+
+#if ENABLE_MODE_STAT_VISUALISER
+        isvce_msv_get_input_frame(ps_codec->ps_mode_stat_visualiser, &s_inp_buf);
+#endif
+
+        for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
+        {
+            isvce_svc_layer_pic_init(ps_codec, &s_inp_buf, i);
+
+            for(j = 0; j < num_thread_cnt; j++)
+            {
+                ithread_create(ps_codec->apv_proc_thread_handle[j], NULL, isvce_process_thread,
+                               &ps_codec->as_process[j + 1]);
+
+                ps_codec->ai4_process_thread_created[j] = 1;
+
+                ps_codec->i4_proc_thread_cnt++;
+            }
+
+            /* launch job */
+            isvce_process_thread(ps_proc);
+
+            /* Join threads at the end of encoding a frame */
+            isvce_join_threads(ps_codec);
+
+            ih264_list_reset(ps_codec->pv_proc_jobq);
+
+            ih264_list_reset(ps_codec->pv_entropy_jobq);
+        }
+
+#if ENABLE_MODE_STAT_VISUALISER
+        isvce_msv_dump_visualisation(ps_codec->ps_mode_stat_visualiser);
+#endif
+
+        isvce_sub_pic_rc_dump_data(ps_codec->as_process->ps_sub_pic_rc_ctxt);
+    }
+
+    /****************************************************************************
+     * RECON
+     *    Since we have forward dependent frames, we cannot return recon in
+     *encoding order. It must be in poc order, or input pic order. To achieve this
+     *we introduce a delay of 1 to the recon wrt encode. Now since we have that
+     *    delay, at any point minimum of pic_cnt in our ref buffer will be the
+     *    correct frame. For ex let our GOP be IBBP [1 2 3 4] . The encode order
+     *    will be [1 4 2 3] .Now since we have a delay of 1, when we are done with
+     *    encoding 4, the min in the list will be 1. After encoding 2, it will be
+     *    2, 3 after 3 and 4 after 4. Hence we can return in sequence. Note
+     *    that the 1 delay is critical. Hence if we have post enc skip, we must
+     *    skip here too. Note that since post enc skip already frees the recon
+     *    buffer we need not do any thing here
+     *
+     *    We need to return a recon when ever we consume an input buffer. This
+     *    comsumption include a pre or post enc skip. Thus dump recon is set for
+     *    all cases except when
+     *    1) We are waiting -> ps_codec->i4_pic_cnt >
+     *ps_codec->s_cfg.u4_num_bframe An exception need to be made for the case when
+     *we have the last buffer since we need to flush out the on remainig recon.
+     ****************************************************************************/
+
+    ps_video_encode_op->s_ive_op.dump_recon = 0;
+
+    if(ps_codec->s_cfg.u4_enable_recon &&
+       ((ps_codec->i4_pic_cnt > (WORD32) ps_codec->s_cfg.u4_num_bframes) ||
+        s_inp_buf.s_inp_props.u4_is_last))
+    {
+        /* error status */
+        IH264_ERROR_T ret = IH264_SUCCESS;
+
+        svc_au_buf_t *ps_pic_buf = NULL;
+
+        WORD32 i4_buf_status, i4_curr_poc = 32768;
+
+        /* In case of skips we return recon, but indicate that buffer is zero size
+         */
+        if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel] || i4_rc_pre_enc_skip)
+        {
+            ps_video_encode_op->s_ive_op.dump_recon = 1;
+            ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[0] = 0;
+            ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[1] = 0;
+        }
+        else
+        {
+            for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+            {
+                if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue;
+
+                i4_buf_status = ih264_buf_mgr_get_status(
+                    ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
+
+                if((i4_buf_status & BUF_MGR_IO) && (ps_codec->as_ref_set[i].i4_poc < i4_curr_poc))
+                {
+                    ps_pic_buf = ps_codec->as_ref_set[i].ps_pic_buf;
+                    i4_curr_poc = ps_codec->as_ref_set[i].i4_poc;
+                }
+            }
+
+            ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf;
+
+            /*
+             * If we get a valid buffer. output and free recon.
+             *
+             * we may get an invalid buffer if num_b_frames is 0. This is because
+             * We assume that there will be a ref frame in ref list after encoding
+             * the last frame. With B frames this is correct since its forward ref
+             * pic will be in the ref list. But if num_b_frames is 0, we will not
+             * have a forward ref pic
+             */
+
+            if(ps_pic_buf)
+            {
+                if((ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[Y] !=
+                    ps_codec->s_cfg.u4_disp_wd) ||
+                   (ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_ht[Y] !=
+                    ps_codec->s_cfg.u4_disp_ht))
+                {
+                    SET_ERROR_ON_RETURN(IH264E_NO_FREE_RECONBUF, IVE_FATALERROR,
+                                        ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+                }
+
+                isvce_fmt_conv(ps_codec, ps_pic_buf,
+                               ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
+                               ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
+                               ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
+                               ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
+                               ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1], 0,
+                               ps_codec->s_cfg.u4_disp_ht);
+
+                ps_video_encode_op->s_ive_op.dump_recon = 1;
+
+                ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_pic_buf->i4_buf_id,
+                                            BUF_MGR_IO);
+
+                if(IH264_SUCCESS != ret)
+                {
+                    SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
+                                        ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+                }
+            }
+        }
+    }
+
+    /***************************************************************************
+     * Free reference buffers:
+     * In case of a post enc skip, we have to ensure that those pics will not
+     * be used as reference anymore. In all other cases we will not even mark
+     * the ref buffers
+     ***************************************************************************/
+    if(ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+    {
+        /* pic info */
+        svc_au_buf_t *ps_cur_pic;
+
+        /* mv info */
+        svc_au_data_t *ps_cur_mv_buf;
+
+        /* error status */
+        IH264_ERROR_T ret = IH264_SUCCESS;
+
+        /* Decrement coded pic count */
+        ps_codec->i4_poc--;
+
+        /* loop through to get the min pic cnt among the list of pics stored in ref
+         * list */
+        /* since the skipped frame may not be on reference list, we may not have an
+         * MV bank hence free only if we have allocated */
+        for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+        {
+            if(ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
+            {
+                ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+
+                ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_svc_au_data;
+
+                /* release this frame from reference list and recon list */
+                ret = ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr,
+                                            ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF);
+                ret |= ih264_buf_mgr_release(ps_codec->pv_svc_au_data_store_mgr,
+                                             ps_cur_mv_buf->i4_buf_id, BUF_MGR_IO);
+                SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
+                                    ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+
+                ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id,
+                                            BUF_MGR_REF);
+                ret |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id,
+                                             BUF_MGR_IO);
+                SET_ERROR_ON_RETURN((IH264E_ERROR_T) ret, IVE_FATALERROR,
+                                    ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+                break;
+            }
+        }
+    }
+
+    /*
+     * Since recon is not in sync with output, ie there can be frame to be
+     * given back as recon even after last output. Hence we need to mark that
+     * the output is not the last.
+     * Hence search through reflist and mark appropriately
+     */
+    if(ps_codec->s_cfg.u4_enable_recon)
+    {
+        WORD32 i4_buf_status = 0;
+
+        for(i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+        {
+            if(ps_codec->as_ref_set[i].i4_pic_cnt == -1) continue;
+
+            i4_buf_status |= ih264_buf_mgr_get_status(
+                ps_codec->pv_ref_buf_mgr, ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
+        }
+
+        if(i4_buf_status & BUF_MGR_IO)
+        {
+            s_out_buf.u4_is_last = 0;
+            ps_video_encode_op->s_ive_op.u4_is_last = 0;
+        }
+    }
+
+    /**************************************************************************
+     * Signaling to APP
+     *  1) If we valid a valid output mark it so
+     *  2) Set the codec output ps_video_encode_op
+     *  3) Set the error status
+     *  4) Set the return Pic type
+     *      Note that we already has marked recon properly
+     *  5)Send the consumed input back to app so that it can free it if possible
+     *
+     *  We will have to return the output and input buffers unconditionally
+     *  so that app can release them
+     **************************************************************************/
+    if(!i4_rc_pre_enc_skip && !ps_codec->s_rate_control.post_encode_skip[ctxt_sel] &&
+       s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
+    {
+        /* receive output back from codec */
+        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+        /* send the output to app */
+        ps_video_encode_op->s_ive_op.output_present = 1;
+        ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+        /* Set the time stamps of the encodec input */
+        ps_video_encode_op->s_ive_op.u4_timestamp_low = s_inp_buf.s_inp_props.u4_timestamp_low;
+        ps_video_encode_op->s_ive_op.u4_timestamp_high = s_inp_buf.s_inp_props.u4_timestamp_high;
+
+        switch(ps_codec->pic_type)
+        {
+            case PIC_IDR:
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME;
+                break;
+
+            case PIC_I:
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME;
+                break;
+
+            case PIC_P:
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
+                break;
+
+            case PIC_B:
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_B_FRAME;
+                break;
+
+            default:
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+                break;
+        }
+
+        for(i = 0; i < (WORD32) ps_codec->s_cfg.u4_num_cores; i++)
+        {
+            error_status = ps_codec->as_process[ctxt_sel + i].i4_error_code;
+            SET_ERROR_ON_RETURN(error_status, IVE_FATALERROR,
+                                ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL);
+        }
+    }
+    else
+    {
+        /* receive output back from codec */
+        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+        ps_video_encode_op->s_ive_op.output_present = 0;
+        ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+        /* Set the time stamps of the encodec input */
+        ps_video_encode_op->s_ive_op.u4_timestamp_low = 0;
+        ps_video_encode_op->s_ive_op.u4_timestamp_high = 0;
+
+        ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf;
+
+        ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+    }
+
+    /* Send the input to encoder so that it can free it if possible */
+    ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].as_bits_buf[0];
+
+    for(i = 1; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
+    {
+        memmove(((UWORD8 *) ps_video_encode_op->s_ive_op.s_out_buf.pv_buf +
+                 ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes),
+                ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].pv_buf,
+                ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes);
+
+        ps_video_encode_op->s_ive_op.s_out_buf.u4_bytes +=
+            ps_codec->as_out_buf[ctxt_sel].as_bits_buf[i].u4_bytes;
+    }
+
+    if(ps_codec->s_cfg.b_nalu_info_export_enable && !i4_rc_pre_enc_skip &&
+       !ps_codec->s_rate_control.post_encode_skip[ctxt_sel] &&
+       s_inp_buf.s_inp_props.s_raw_buf.apv_bufs[0])
+    {
+        ps_video_encode_op->b_is_nalu_info_present = true;
+
+        for(i = 0; i < ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; i++)
+        {
+            isvce_nalu_info_csv_translator(&ps_codec->as_nalu_descriptors[i],
+                                           &ps_video_encode_ip->ps_nalu_info_buf[i]);
+
+            ps_video_encode_op->ps_nalu_info_buf[i] = ps_video_encode_ip->ps_nalu_info_buf[i];
+        }
+    }
+    else
+    {
+        ps_video_encode_op->b_is_nalu_info_present = false;
+    }
+
+    ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_inp_props.s_raw_buf;
+
+    return IV_SUCCESS;
+}
--- a/encoder/svc/isvce_encode.h
+++ b/encoder/svc/isvce_encode.h
@ -0,0 +1,41 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_encode.h
+*
+* @brief
+*  Contains functions for encode API
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_ENCODE_H_
+#define _ISVCE_ENCODE_H_
+
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+
+extern WORD32 isvce_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op);
+
+#endif
--- a/encoder/svc/isvce_encode_header.c
+++ b/encoder/svc/isvce_encode_header.c
--- a/encoder/svc/isvce_encode_header.h
+++ b/encoder/svc/isvce_encode_header.h
@ -0,0 +1,296 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file
+*  isvce_encode_header.h
+*
+* @brief
+*  This file contains structures and interface prototypes for h264 bitstream
+*  header encoding
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_ENCODE_HEADER_H_
+#define _ISVCE_ENCODE_HEADER_H_
+
+#include "ih264_typedefs.h"
+
+/* Dependencies of ih264e_bitstream.h */
+#include "ih264e_error.h"
+
+#include "ih264e_bitstream.h"
+#include "ih264e_trace.h"
+#include "isvce_structs.h"
+
+/**
+******************************************************************************
+*  @brief   Macro to put a code with specified number of bits into the
+*           bitstream
+******************************************************************************
+*/
+#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \
+    {                                                                    \
+        ENTROPY_TRACE(syntax_string, code_val);                          \
+        ret_val = ih264e_put_bits((ps_bitstrm), (code_val), (code_len)); \
+        if(ret_val != IH264E_SUCCESS)                                    \
+        {                                                                \
+            return ret_val;                                              \
+        }                                                                \
+    }
+
+/**
+******************************************************************************
+*  @brief   Macro to put a code with specified number of bits into the
+*           bitstream using 0th order exponential Golomb encoding for
+*           signed numbers
+******************************************************************************
+*/
+#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+    {                                                              \
+        ENTROPY_TRACE(syntax_string, code_val);                    \
+        ret_val = ih264e_put_uev((ps_bitstrm), (code_val));        \
+        if(ret_val != IH264E_SUCCESS)                              \
+        {                                                          \
+            return ret_val;                                        \
+        }                                                          \
+    }
+/**
+******************************************************************************
+*  @brief   Macro to put a code with specified number of bits into the
+*           bitstream using 0th order exponential Golomb encoding for
+*           signed numbers
+******************************************************************************
+*/
+#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+    {                                                              \
+        ENTROPY_TRACE(syntax_string, code_val);                    \
+        ret_val = ih264e_put_sev((ps_bitstrm), (code_val));        \
+        if(ret_val != IH264E_SUCCESS)                              \
+        {                                                          \
+            return ret_val;                                        \
+        }                                                          \
+    }
+
+/**
+******************************************************************************
+*  @brief   Macro to set active entropy threads to zero and return
+*           in case of errors
+******************************************************************************
+*/
+#define RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel) \
+    if(ps_entropy->i4_error_code != IH264E_SUCCESS)             \
+    {                                                           \
+        DATA_SYNC();                                            \
+        ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;      \
+        return ps_entropy->i4_error_code;                       \
+    }
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+extern WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm, WORD32 nal_unit_type,
+                                              WORD32 nal_ref_idc);
+
+extern WORD32 ih264e_generate_vui(bitstrm_t *ps_bitstrm, vui_t *ps_vui);
+
+extern IH264E_ERROR_T ih264e_generate_sei(bitstrm_t *ps_bitstrm, sei_params_t *ps_sei,
+                                          UWORD32 u4_insert_per_idr);
+
+extern IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm, WORD32 insert_fill_bytes);
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Sequence Parameter Set)
+*
+* @par   Description
+*  This function generates Sequence Parameter Set header as per the spec
+*
+* @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]   ps_sps
+*  pointer to structure containing SPS data
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 isvce_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps, NAL_UNIT_TYPE_T nal_type);
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par   Description
+*  Generate Picture Parameter Set as per Section 7.3.2.2
+*
+* @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]   ps_pps
+*  pointer to structure containing PPS data
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 isvce_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps);
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par   Description
+*  Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout]   ps_bitstrm
+*  pointer to bitstream context for generating slice header
+*
+* @param[in]   ps_slice_hdr
+*  pointer to slice header params
+*
+* @param[in]   ps_pps
+*  pointer to pps params referred by slice
+*
+* @param[in]   ps_sps
+*  pointer to sps params referred by slice
+*
+* @param[out]   ps_dup_bit_strm_ent_offset
+*  Bitstream struct to store bitstream state
+*
+* @param[out]   pu4_first_slice_start_offset
+*  first slice offset is returned
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 isvce_generate_slice_header(bitstrm_t *ps_bitstrm, slice_header_t *ps_slice_hdr,
+                                   pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_idr_flag);
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par   Description
+*  Populates sps structure for its use in header generation
+*
+* @param[in]   ps_codec
+*  pointer to encoder context
+*
+* @param[out]  ps_sps
+*  pointer to sps params that needs to be populated
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T isvce_populate_sps(isvce_codec_t *ps_codec, sps_t *ps_sps, UWORD8 u1_sps_id,
+                                  UWORD8 u1_profile_idc, isvce_inp_buf_t *ps_inp_buf,
+                                  UWORD8 u1_spatial_layer_id);
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure
+*
+* @par   Description
+*  Populates pps structure for its use in header generation
+*
+* @param[in]   ps_codec
+*  pointer to encoder context
+*
+* @param[out]  ps_pps
+*  pointer to pps params that needs to be populated
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T isvce_populate_pps(isvce_codec_t *ps_codec, pps_t *ps_pps, UWORD8 u1_sps_id,
+                                  UWORD8 u1_pps_id, UWORD8 u1_spatial_layer_id);
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par   Description
+*  Populates slice header structure for its use in header generation
+*
+* @param[in]  ps_proc
+*  pointer to proc context
+*
+* @param[out]  ps_slice_hdr
+*  pointer to slice header structure that needs to be populated
+*
+* @param[in]  ps_pps
+*  pointer to pps params structure referred by the slice
+*
+* @param[in]   ps_sps
+*  pointer to sps params referred by the pps
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 isvce_populate_slice_header(isvce_process_ctxt_t *ps_proc, slice_header_t *ps_slice_hdr,
+                                   pps_t *ps_pps, sps_t *ps_sps, UWORD8 u1_is_idr);
+
+extern WORD32 isvce_populate_svc_nalu_extension(isvce_process_ctxt_t *ps_proc,
+                                                svc_nalu_ext_t *ps_svc_nalu_ext,
+                                                NAL_UNIT_TYPE_T nalu_type, UWORD8 u1_idr_flag);
+
+extern WORD32 isvce_generate_svc_nalu_extension(bitstrm_t *ps_bitstrm,
+                                                svc_nalu_ext_t *ps_svc_nalu_ext, UWORD8 u1_nalu_id);
+
+extern WORD32 isvce_populate_svc_slice(isvce_process_ctxt_t *ps_proc,
+                                       svc_slice_header_t *ps_svc_slice_hdr, pps_t *ps_pps,
+                                       subset_sps_t *ps_subset_sps,
+                                       svc_nalu_ext_t *ps_svc_nalu_ext);
+
+extern WORD32 isvce_populate_subset_sps(isvce_codec_t *ps_codec, subset_sps_t *ps_subset_sps,
+                                        UWORD8 u1_sps_id, isvce_inp_buf_t *ps_inp_buf,
+                                        UWORD8 u1_spatial_layer_id);
+
+extern WORD32 isvce_generate_prefix_nal(bitstrm_t *ps_bitstrm, svc_nalu_ext_t *ps_svc_nalu_ext,
+                                        slice_header_t *ps_slice_header,
+                                        UWORD8 u1_max_num_ref_frames, UWORD8 u1_num_spatial_layers);
+
+extern WORD32 isvce_generate_slice_header_svc(bitstrm_t *ps_bitstrm, pps_t *ps_pps,
+                                              svc_nalu_ext_t *ps_svc_nalu_ext,
+                                              svc_slice_header_t *ps_svc_slice_hdr,
+                                              subset_sps_t *ps_subset_sps);
+
+extern WORD32 isvce_generate_subset_sps(bitstrm_t *ps_bitstrm, subset_sps_t *ps_subset_sps);
+
+#endif
--- a/encoder/svc/isvce_error.h
+++ b/encoder/svc/isvce_error.h
@ -0,0 +1,70 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvce_error.h
+*
+* @brief
+*  SVC specific error codes
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_ERROR_H_
+#define _ISVCE_ERROR_H_
+
+#include "ih264e_error.h"
+
+typedef enum ISVCE_ERRORS_T
+{
+    /**Invalid SVC params */
+    IH264E_INVALID_SVC_PARAMS = IH264E_CODEC_ERROR_START + 0x100,
+
+    /**Invalid num_temporal_layers */
+    IH264E_INVALID_NUM_TEMPORAL_LAYERS = IH264E_CODEC_ERROR_START + 0x101,
+
+    /**Invalid num_spatial_layers */
+    IH264E_INVALID_NUM_SPATIAL_LAYERS = IH264E_CODEC_ERROR_START + 0x102,
+
+    /**Invalid spatial_res_ratio */
+    IH264E_INVALID_SPATIAL_RES_RATIO = IH264E_CODEC_ERROR_START + 0x103,
+
+    /** Weighted prediction not supported */
+    IH264E_WEIGHTED_PRED_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x104,
+
+    /** CABAC entropy mode not supported for SVC */
+    IH264E_CABAC_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x105,
+
+    /**Invalid input dimensions */
+    IH264E_INVALID_SVC_INPUT_DIMENSIONS = IH264E_CODEC_ERROR_START + 0x106,
+
+    /** Invalid init QP */
+    IH264E_INVALID_DYN_INIT_QP = IH264E_CODEC_ERROR_START + 0x107,
+
+} ISVCE_ERRORS_T;
+
+#endif
--- a/encoder/svc/isvce_fmt_conv.c
+++ b/encoder/svc/isvce_fmt_conv.c
@ -0,0 +1,145 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_fmt_conv.c
+*
+* @brief
+*  Contains functions for format conversion or frame copy of output buffer
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - isvce_fmt_conv()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+/* Dependencies of ih264_buf_mgr.h */
+/* Dependencies of ih264_list.h */
+#include "ih264_error.h"
+/* Dependencies of ih264_common_tables.h */
+#include "ih264_defs.h"
+#include "ih264_structs.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264_platform_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+/* Dependencies of ih264e_cabac_structs.h */
+#include "ih264_cabac_tables.h"
+/* Dependencies of ime_structs.h */
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+/* Dependencies of ih264e_structs.h */
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+/* Dependencies of ih264e_bitstream.h */
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264e_cabac_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ime_statistics.h"
+#include "ime_structs.h"
+/* Dependencies of 'ih264e_utils.h' */
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264e_fmt_conv.h"
+#include "isvce_structs.h"
+
+IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst,
+                              UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd,
+                              UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows)
+{
+    IH264E_ERROR_T ret = IH264E_SUCCESS;
+    UWORD8 *pu1_y_src, *pu1_uv_src;
+    UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
+    UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
+    WORD32 is_u_first;
+    UWORD8 *pu1_luma;
+    UWORD8 *pu1_chroma;
+    WORD32 wd;
+
+    WORD32 src_y_strd;
+    WORD32 src_uv_strd;
+
+    WORD32 layer_id = ps_pic->u1_num_spatial_layers - 1;
+
+    if(0 == num_rows)
+    {
+        return ret;
+    }
+
+    pu1_luma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].pv_data;
+    pu1_chroma = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].pv_data;
+
+    src_y_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[0].i4_data_stride;
+    src_uv_strd = ps_pic->ps_layer_yuv_buf_props[layer_id].as_component_bufs[1].i4_data_stride;
+
+    wd = ps_codec->s_cfg.u4_disp_wd;
+    is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
+
+    /* In case of 420P output luma copy is disabled for shared mode */
+    {
+        pu1_y_src = pu1_luma + cur_row * src_y_strd;
+        pu1_uv_src = pu1_chroma + (cur_row / 2) * src_uv_strd;
+
+        pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
+        pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+        pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+        pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
+
+        /* If the call is non-blocking and there are no rows to be copied then
+         * return */
+        /* In non-shared mode, reference buffers are in 420SP UV format,
+         * if output also is in 420SP_UV, then just copy
+         * if output is in 420SP_VU then swap UV values
+         */
+        if((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
+           (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
+        {
+            ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_uv_dst_tmp, wd,
+                                           num_rows, ps_codec->i4_rec_strd, ps_codec->i4_rec_strd,
+                                           u4_dst_y_strd, u4_dst_uv_strd);
+        }
+        else if(IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
+        {
+            ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, pu1_u_dst_tmp,
+                                          pu1_v_dst_tmp, wd, num_rows, ps_codec->i4_rec_strd,
+                                          ps_codec->i4_rec_strd, u4_dst_y_strd, u4_dst_uv_strd,
+                                          is_u_first, 0);
+        }
+    }
+    return (ret);
+}
--- a/encoder/svc/isvce_fmt_conv.h
+++ b/encoder/svc/isvce_fmt_conv.h
@ -0,0 +1,48 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_fmt_conv.h
+*
+* @brief
+*  The file contains extern declarations of color space conversion routines
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_FMT_CONV_H_
+#define _ISVCE_FMT_CONV_H_
+
+#include "ih264e_fmt_conv.h"
+#include "isvce_structs.h"
+
+IH264E_ERROR_T isvce_fmt_conv(isvce_codec_t *ps_codec, svc_au_buf_t *ps_pic, UWORD8 *pu1_y_dst,
+                              UWORD8 *pu1_u_dst, UWORD8 *pu1_v_dst, UWORD32 u4_dst_y_strd,
+                              UWORD32 u4_dst_uv_strd, WORD32 cur_row, WORD32 num_rows);
+
+#endif
--- a/encoder/svc/isvce_function_selector_generic.c
+++ b/encoder/svc/isvce_function_selector_generic.c
@ -0,0 +1,314 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_function_selector_generic.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - isvce_init_function_ptr_generic
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "isvc_defs.h"
+#include "ih264_size_defs.h"
+#include "isvce_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_cabac_tables.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "ih264e_platform_macros.h"
+#include "isvce_cabac.h"
+#include "isvce_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "isvce_cavlc.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+#include "isvce_me.h"
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec)
+{
+    WORD32 i = 0;
+
+    /* curr proc ctxt */
+    isvce_process_ctxt_t *ps_proc = NULL;
+    isvce_me_ctxt_t *ps_me_ctxt = NULL;
+    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
+    enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
+    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
+    mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 16x16 */
+    ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert;
+    ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz;
+    ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc;
+    ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 4x4 */
+    ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert;
+    ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz;
+    ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc;
+    ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl;
+    ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr;
+    ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r;
+    ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d;
+    ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l;
+    ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert;
+    ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc;
+    ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl;
+    ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr;
+    ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r;
+    ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d;
+    ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l;
+    ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u;
+
+    /* Init function pointers for intra pred leaf level functions chroma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc;
+    ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz;
+    ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert;
+    ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane;
+
+    /* Init luma forward transform fn ptr */
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8) /
+            sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0])) ==
+           NUM_RESI_TRANS_QUANT_VARIANTS);
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4) /
+            sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0])) ==
+           NUM_RESI_TRANS_QUANT_VARIANTS);
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4) /
+            sizeof(ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0])) ==
+           NUM_RESI_TRANS_QUANT_VARIANTS);
+
+    ps_enc_loop_fxns->apf_resi_trans_quant_8x8[0] = isvc_resi_trans_quant_8x8;
+    ps_enc_loop_fxns->apf_resi_trans_quant_4x4[0] = isvc_resi_trans_quant_4x4;
+    ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[0] = isvc_resi_trans_quant_chroma_4x4;
+    ps_enc_loop_fxns->apf_resi_trans_quant_8x8[1] = isvc_resi_trans_quant_8x8;
+    ps_enc_loop_fxns->apf_resi_trans_quant_4x4[1] = isvc_resi_trans_quant_4x4;
+    ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[1] = isvc_resi_trans_quant_chroma_4x4;
+    ps_enc_loop_fxns->pf_hadamard_quant_4x4 = isvc_hadamard_quant_4x4;
+    ps_enc_loop_fxns->pf_hadamard_quant_2x2_uv = isvc_hadamard_quant_2x2_uv;
+
+    /* Init inverse transform fn ptr */
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8) /
+            sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0])) == NUM_IQ_IT_RECON_VARIANTS);
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4) /
+            sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0])) == NUM_IQ_IT_RECON_VARIANTS);
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc) /
+            sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0])) ==
+           NUM_IQ_IT_RECON_VARIANTS);
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4) /
+            sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0])) ==
+           NUM_IQ_IT_RECON_VARIANTS);
+    ASSERT((sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc) /
+            sizeof(ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0])) ==
+           NUM_IQ_IT_RECON_VARIANTS);
+
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] = isvc_iquant_itrans_recon_4x4;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = isvc_iquant_itrans_recon_4x4_dc;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] = isvc_iquant_itrans_recon_chroma_4x4;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] = isvc_iquant_itrans_recon_4x4;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] = isvc_iquant_itrans_recon_4x4_dc;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] = isvc_iquant_itrans_recon_chroma_4x4;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] = isvc_iquant_itrans_recon_chroma_4x4;
+    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
+        isvc_iquant_itrans_recon_chroma_4x4_dc;
+    ps_enc_loop_fxns->pf_zcbf_iquant_itrans_recon_4x4 = isvc_zcbf_iquant_itrans_recon_4x4;
+    ps_enc_loop_fxns->pf_chroma_zcbf_iquant_itrans_recon_4x4 =
+        isvc_chroma_zcbf_iquant_itrans_recon_4x4;
+
+    ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4;
+    ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv;
+
+    /* Init fn ptr luma core coding */
+    ps_enc_loop_fxns->apf_luma_energy_compaction[0] = isvce_code_luma_intra_macroblock_16x16;
+    ps_enc_loop_fxns->apf_luma_energy_compaction[1] = isvce_code_luma_intra_macroblock_4x4;
+    ps_enc_loop_fxns->apf_luma_energy_compaction[3] = isvce_code_luma_inter_macroblock_16x16;
+
+    /* Init fn ptr chroma core coding */
+    ps_enc_loop_fxns->apf_chroma_energy_compaction[0] = isvce_code_chroma_intra_macroblock_8x8;
+    ps_enc_loop_fxns->apf_chroma_energy_compaction[1] = isvce_code_chroma_inter_macroblock_8x8;
+
+    /* Init fn ptr luma deblocking */
+    ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
+    ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
+    ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
+    ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
+
+    /* Init fn ptr chroma deblocking */
+    ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
+    ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
+    ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
+    ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
+
+    /* write mb syntax layer */
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = isvce_write_islice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = isvce_write_pslice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = isvce_write_bslice_mb_cavlc;
+    ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = isvce_write_islice_mb_cabac;
+    ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = isvce_write_pslice_mb_cabac;
+    ps_codec->pf_write_mb_syntax_layer[CABAC][BSLICE] = isvce_write_bslice_mb_cabac;
+
+    /* Padding Functions */
+    ps_codec->pf_pad_top = ih264_pad_top;
+    ps_codec->pf_pad_bottom = ih264_pad_bottom;
+    ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
+    ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
+    ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
+    ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
+
+    /* Inter pred leaf level functions */
+    ps_inter_pred_fxns->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy;
+    ps_inter_pred_fxns->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz;
+    ps_inter_pred_fxns->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert;
+    ps_inter_pred_fxns->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+    ps_inter_pred_fxns->pf_inter_pred_chroma = ih264_inter_pred_chroma;
+
+    /* sad me level functions */
+    ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16;
+    ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+    ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8;
+
+    /* memory handling operations */
+    ps_mem_fxns->pf_mem_cpy = ih264_memcpy;
+    ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8;
+    ps_mem_fxns->pf_mem_set = ih264_memset;
+    ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8;
+    ps_mem_fxns->pf_copy_2d = isvc_copy_2d;
+    ps_mem_fxns->pf_memset_2d = isvc_memset_2d;
+    ps_mem_fxns->pf_16bit_interleaved_copy = isvc_16bit_interleaved_copy;
+    ps_mem_fxns->pf_16bit_interleaved_memset = isvc_16bit_interleaved_memset;
+    ps_mem_fxns->pf_nonzero_checker = isvc_is_nonzero_blk;
+
+    /* sad me level functions */
+    for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+    {
+        ps_proc = &ps_codec->as_process[i];
+
+        ps_me_ctxt = &ps_proc->s_me_ctxt;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+        ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8;
+        ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog;
+        ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog;
+        ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog;
+        ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16;
+        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter;
+    }
+
+    /* intra mode eval -encoder level function */
+    ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes;
+    ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes;
+    ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+    /* csc */
+    ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+    ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+    /* Halp pel generation function - encoder level*/
+    ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz;
+    ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert;
+
+    /* ME compute */
+    ps_codec->apf_compute_me[PSLICE] = &isvce_compute_me_single_reflist;
+    ps_codec->apf_compute_me[BSLICE] = &isvce_compute_me_multi_reflist;
+
+    /* skip decision */
+    ps_codec->apf_find_skip_params_me[PSLICE] = &isvce_find_pskip_params_me;
+    ps_codec->apf_find_skip_params_me[BSLICE] = &isvce_find_bskip_params_me;
+}
--- a/encoder/svc/isvce_globals.c
+++ b/encoder/svc/isvce_globals.c
@ -0,0 +1,48 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvce_globals.c
+*
+* @brief
+*  Contains definitions of global variables used across the encoder
+*
+* @author
+*  ittiam
+*
+* @par List of functions
+*
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+
+/* Raster to z scan map */
+const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB] = {0, 1, 4,  5,  2,  3,  6,  7,
+                                                       8, 9, 12, 13, 10, 11, 14, 15};
--- a/encoder/svc/isvce_globals.h
+++ b/encoder/svc/isvce_globals.h
@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_globals.h
+*
+* @brief
+*  Contains declarations of global variables for H264 encoder
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_GLOBALS_H_
+#define _ISVCE_GLOBALS_H_
+
+#include "ih264e_globals.h"
+
+extern const UWORD8 gau1_raster_to_zscan_map[MAX_TU_IN_MB];
+
+#endif
--- a/encoder/svc/isvce_ibl_eval.c
+++ b/encoder/svc/isvce_ibl_eval.c
--- a/encoder/svc/isvce_ibl_eval.h
+++ b/encoder/svc/isvce_ibl_eval.h
@ -0,0 +1,105 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_intra_pred.h
+*
+* @brief
+*  Contains function declarations for function declared in
+*isvce_intra_pred.c
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _ISVCE_IBL_EVAL_H_
+#define _ISVCE_IBL_EVAL_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_macros.h"
+#include "ih264_debug.h"
+#include "isvc_defs.h"
+#include "isvc_structs.h"
+#include "isvc_intra_resample.h"
+#include "isvce_structs.h"
+#include "isvce_structs.h"
+
+#define TEMP_BUF_SIZE_LUMA (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH)
+#define TEMP_BUF_SIZE_CB (REF_ARRAY_WIDTH * REF_ARRAY_WIDTH)
+#define TEMP_BUF_SIZE_CR (DYADIC_REF_W_C * DYADIC_REF_H_C)
+
+#define INTERMEDIATE_BUFF_WIDTH 48
+#define INTERMEDIATE_BUFF_HEIGHT (MB_SIZE + 4)
+#define TEMP_INTERPOLATION_BUF_SIZE (INTERMEDIATE_BUFF_WIDTH * INTERMEDIATE_BUFF_HEIGHT)
+
+/* Structs */
+typedef struct intra_pred_constants_t
+{
+    void *pv_state;
+} intra_pred_constants_t;
+
+typedef struct intra_pred_outputs_t
+{
+    yuv_buf_props_t s_pred_buf;
+} intra_pred_outputs_t;
+
+typedef struct intra_pred_variables_t
+{
+    svc_ilp_data_t *ps_svc_ilp_data;
+
+    coordinates_t s_mb_pos;
+
+    UWORD8 u1_spatial_layer_id;
+} intra_pred_variables_t;
+
+typedef struct svc_intra_pred_ctxt_t
+{
+    intra_pred_constants_t s_intra_pred_constants;
+
+    intra_pred_variables_t s_intra_pred_variables;
+
+    intra_pred_outputs_t s_intra_pred_outputs;
+
+} svc_intra_pred_ctxt_t;
+
+extern UWORD32 isvce_get_svc_intra_pred_ctxt_size(UWORD8 u1_num_spatial_layers,
+                                                  DOUBLE d_spatial_res_ratio, UWORD32 u4_wd,
+                                                  UWORD32 u4_ht);
+
+extern void isvce_intra_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
+
+extern void isvce_update_ibl_info(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt,
+                                  UWORD8 u1_num_spatial_layers, UWORD8 u1_spatial_layer_id,
+                                  UWORD16 u2_mb_type, WORD32 i4_mb_x, WORD32 i4_mb_y,
+                                  WORD8 u1_base_mode_flag);
+
+extern void isvce_evaluate_IBL_mode(isvce_process_ctxt_t *ps_proc);
+
+extern void isvce_pad_mb_mode_buf(svc_intra_pred_ctxt_t *ps_intra_pred_ctxt,
+                                  UWORD8 u1_spatial_layer_id, UWORD8 u1_num_spatial_layers,
+                                  DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht);
+
+#endif
--- a/encoder/svc/isvce_ibl_private_defs.h
+++ b/encoder/svc/isvce_ibl_private_defs.h
@ -0,0 +1,94 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_intra_pred_private_defs.h
+*
+* @brief
+*  Contains datatype and macro definitions used exclusively in
+*  residual prediction
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_IBL_PRIVATE_DEFS_H_
+#define _ISVCE_IBL_PRIVATE_DEFS_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_defs.h"
+#include "isvc_structs.h"
+#include "isvce_structs.h"
+#include "isvc_intra_resample.h"
+
+/* Structs */
+typedef struct intra_pred_mb_state_t
+{
+    coordinates_t s_offsets;
+
+    coordinates_t s_ref_array_dims;
+
+    WORD32 *pi4_ref_array_positions_x;
+
+    WORD32 *pi4_ref_array_positions_y;
+
+    coordinates_t *ps_ref_array_phases;
+
+    coordinates_t s_min_pos;
+
+    coordinates_t s_max_pos;
+
+} intra_pred_mb_state_t;
+
+typedef struct intra_pred_layer_state_t
+{
+    layer_resampler_props_t *ps_luma_props;
+
+    layer_resampler_props_t *ps_chroma_props;
+
+    intra_pred_mb_state_t *ps_luma_mb_states;
+
+    intra_pred_mb_state_t *ps_chroma_mb_states;
+
+    WORD8 *pi1_mb_mode;
+
+    WORD32 i4_mb_mode_stride;
+
+    /* buffer to store the reference
+       layer data before intra sampling */
+    UWORD8 *pu1_refarray_buffer;
+
+    UWORD8 *pu1_refarray_cb;
+
+    UWORD8 *pu1_refarray_cr;
+
+    WORD32 *pi4_temp_interpolation_buffer;
+
+} intra_pred_layer_state_t;
+
+typedef struct intra_pred_state_t
+{
+    /* Array of size numSpatialLayers */
+    intra_pred_layer_state_t *ps_layer_state;
+
+} intra_pred_state_t;
+
+#endif
--- a/encoder/svc/isvce_ilp_mv.c
+++ b/encoder/svc/isvce_ilp_mv.c
@ -0,0 +1,737 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_ilp_mv.c
+*
+* @brief
+*  Contains functions used for deriving inter_layer MV's
+*
+*******************************************************************************
+*/
+#include <stdint.h>
+#include <math.h>
+#include <stdbool.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_debug.h"
+#include "isvc_macros.h"
+#include "isvc_defs.h"
+#include "isvce_defs.h"
+#include "isvce_structs.h"
+#include "isvce_ilp_mv_private_defs.h"
+#include "isvce_ilp_mv.h"
+#include "isvce_ilp_mv_utils.h"
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Returns size of buffers for storing ILP MV ctxt
+*
+* @param[in] u1_num_spatial_layers
+*  Num Spatial Layers
+*
+* @param[in] d_spatial_res_ratio
+*  Resolution Ratio b/w spatial layers
+*
+* @param[in] u4_wd
+*  Input Width
+*
+* @param[in] u4_ht
+*  Input Height
+*
+* @returns  Size of buffers
+*
+*******************************************************************************
+*/
+UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
+                                   UWORD32 u4_wd, UWORD32 u4_ht)
+{
+    UWORD32 u4_size = 0;
+
+    if(u1_num_spatial_layers > 1)
+    {
+        WORD32 i;
+
+        u4_size += MAX_PROCESS_CTXT * sizeof(svc_ilp_mv_ctxt_t);
+        u4_size += MAX_PROCESS_CTXT * sizeof(ilp_mv_state_t);
+
+        u4_size += u1_num_spatial_layers * sizeof(ilp_mv_layer_state_t);
+
+        for(i = u1_num_spatial_layers - 1; i >= 1; i--)
+        {
+            WORD32 i4_layer_luma_wd =
+                (WORD32) ((DOUBLE) u4_wd /
+                          pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) +
+                0.99;
+            WORD32 i4_layer_luma_ht =
+                ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99;
+            WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
+
+            u4_size += i4_layer_luma_mbs * sizeof(ilp_mv_mb_state_t);
+        }
+    }
+
+    return u4_size;
+}
+
+static FORCEINLINE void isvce_ref_layer_pu_and_mb_pos_init(layer_resampler_props_t *ps_layer_props,
+                                                           ilp_mv_mb_state_t *ps_mb_state,
+                                                           coordinates_t *ps_mb_pos,
+                                                           UWORD32 u4_ref_wd, UWORD32 u4_ref_ht,
+                                                           UWORD8 u1_field_pic_flag,
+                                                           UWORD8 u1_field_mb_flag)
+{
+    UWORD32 i, j;
+
+    coordinates_t(*aps_pu_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_pu_positions;
+    coordinates_t(*aps_mb_positions)[MAX_PU_IN_MB_ROW] = ps_mb_state->as_mb_positions;
+
+    for(i = 0; i < MAX_PU_IN_MB_COL; i++)
+    {
+        UWORD32 u4_y_ref16;
+
+        UWORD32 u4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht +
+                        (4 * i + 1) * (1 + u1_field_mb_flag - u1_field_pic_flag);
+
+        u4_y_ref16 =
+            (u4_yc * ps_layer_props->u4_scale_y + (1 << (ps_layer_props->u4_shift_y - 1))) >>
+            ps_layer_props->u4_shift_y;
+        u4_y_ref16 = MIN(u4_y_ref16, u4_ref_ht - 1);
+
+        for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
+        {
+            UWORD32 u4_x_ref16;
+
+            UWORD32 u4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + 4 * j + 1;
+
+            u4_x_ref16 =
+                (u4_xc * ps_layer_props->u4_scale_x + (1 << (ps_layer_props->u4_shift_x - 1))) >>
+                ps_layer_props->u4_shift_x;
+            u4_x_ref16 = MIN(u4_x_ref16, u4_ref_wd - 1);
+
+            aps_pu_positions[i][j].i4_abscissa = u4_x_ref16;
+            aps_pu_positions[i][j].i4_ordinate = u4_y_ref16;
+
+            aps_mb_positions[i][j].i4_abscissa = (u4_x_ref16 / MB_SIZE);
+            aps_mb_positions[i][j].i4_ordinate = (u4_y_ref16 / MB_SIZE);
+        }
+    }
+}
+
+static void isvce_ilp_mv_layer_state_init(ilp_mv_layer_state_t *ps_layer_state,
+                                          DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht)
+{
+    UWORD32 i, j;
+
+    const UWORD8 u1_ref_layer_field_pic_flag = 0;
+    const UWORD8 u1_field_pic_flag = 0;
+    const UWORD8 u1_field_mb_flag = 0;
+
+    ilp_mv_mb_state_t *ps_mb_states;
+    layer_resampler_props_t *ps_layer_props;
+
+    UWORD32 u4_wd_in_mbs;
+    UWORD32 u4_ht_in_mbs;
+
+    UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio);
+    UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag);
+    UWORD32 u4_scaled_wd = u4_wd;
+    UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag);
+
+    ps_mb_states = ps_layer_state->ps_mb_states;
+    ps_layer_props = ps_layer_state->ps_props;
+
+    u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd;
+    u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht;
+
+    ps_layer_state->s_mv_scale.i4_abscissa = ((u4_scaled_wd << 16) + (u4_ref_wd >> 1)) / u4_ref_wd;
+    ps_layer_state->s_mv_scale.i4_ordinate = ((u4_scaled_ht << 16) + (u4_ref_ht >> 1)) / u4_ref_ht;
+
+    for(i = 0; i < u4_ht_in_mbs; i++)
+    {
+        for(j = 0; j < u4_wd_in_mbs; j++)
+        {
+            coordinates_t s_mb_pos = {j, i};
+
+            isvce_ref_layer_pu_and_mb_pos_init(ps_layer_props, &ps_mb_states[j + i * u4_wd_in_mbs],
+                                               &s_mb_pos, u4_ref_wd, u4_ref_ht, u1_field_pic_flag,
+                                               u1_field_mb_flag);
+        }
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to initialize svc ilp buffers
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] ps_mem_rec
+*  Pointer to memory allocated for input buffers
+*
+*******************************************************************************
+*/
+void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
+{
+    WORD32 i, j;
+
+    const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]);
+    UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
+
+    if(u1_num_spatial_layers > 1)
+    {
+        ilp_mv_layer_state_t *ps_layer_states;
+        ilp_mv_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS];
+
+        DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio;
+        UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
+        UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+        WORD64 i8_alloc_mem_size =
+            isvce_get_ilp_mv_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht);
+
+        for(i = 0; i < i4_num_proc_ctxts; i++)
+        {
+            ilp_mv_state_t *ps_ilp_mv_state;
+            svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt;
+
+            isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i;
+
+            ps_ilp_mv_ctxt = ps_proc->ps_svc_ilp_mv_ctxt = (svc_ilp_mv_ctxt_t *) pu1_buf;
+            pu1_buf += sizeof(svc_ilp_mv_ctxt_t);
+            i8_alloc_mem_size -= sizeof(svc_ilp_mv_ctxt_t);
+
+            ps_ilp_mv_ctxt->s_ilp_mv_constants.pv_state = pu1_buf;
+            ps_ilp_mv_state = (ilp_mv_state_t *) pu1_buf;
+            pu1_buf += sizeof(ilp_mv_state_t);
+            i8_alloc_mem_size -= sizeof(ilp_mv_state_t);
+
+            if(0 == i)
+            {
+                ps_ilp_mv_state->ps_layer_state = (ilp_mv_layer_state_t *) pu1_buf;
+                ps_layer_states = ps_ilp_mv_state->ps_layer_state;
+                pu1_buf += u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]);
+                i8_alloc_mem_size -=
+                    u1_num_spatial_layers * sizeof(ps_ilp_mv_state->ps_layer_state[0]);
+            }
+            else
+            {
+                ps_ilp_mv_state->ps_layer_state = ps_layer_states;
+            }
+
+            ASSERT(i8_alloc_mem_size >= 0);
+
+            if(0 == i)
+            {
+                for(j = u1_num_spatial_layers - 1; j >= 1; j--)
+                {
+                    ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j];
+
+                    WORD32 i4_layer_luma_wd =
+                        ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
+                        0.99;
+                    WORD32 i4_layer_luma_ht =
+                        ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
+                        0.99;
+                    WORD32 i4_layer_luma_mbs =
+                        (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
+
+                    ps_layer->ps_mb_states = (ilp_mv_mb_state_t *) pu1_buf;
+                    aps_luma_mb_states[j] = ps_layer->ps_mb_states;
+                    pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_mb_states[0]);
+                    i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_layer->ps_mb_states[0]);
+
+                    ASSERT(i8_alloc_mem_size >= 0);
+                    /* Asserts below verify that
+                     * 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised
+                     */
+                    ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd ==
+                           MB_SIZE);
+
+                    ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
+
+                    isvce_ilp_mv_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd,
+                                                  i4_layer_luma_ht);
+                }
+            }
+            else
+            {
+                for(j = u1_num_spatial_layers - 1; j >= 1; j--)
+                {
+                    ilp_mv_layer_state_t *ps_layer = &ps_ilp_mv_state->ps_layer_state[j];
+
+                    ps_layer->ps_mb_states = aps_luma_mb_states[j];
+
+                    ps_layer->ps_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
+                }
+            }
+        }
+    }
+    else
+    {
+        for(i = 0; i < i4_num_proc_ctxts; i++)
+        {
+            ps_codec->as_process[i].ps_svc_ilp_mv_ctxt = NULL;
+        }
+    }
+}
+
+static void isvce_get_ilp_mvs_for_me(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt)
+{
+    svc_layer_data_t *ps_ref_layer_data;
+    ilp_mv_layer_state_t *ps_layer_state;
+    ilp_mv_mb_state_t *ps_mb_state;
+    isvce_mb_info_t *ps_ref_mb_info;
+    coordinates_t s_frame_dims;
+    coordinates_t s_frame_dims_in_mbs;
+    coordinates_t s_ref_frame_dims;
+    coordinates_t s_ref_frame_dims_in_mbs;
+
+    bool b_is_mv_non_identical;
+    WORD32 i, j, k;
+
+    ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants;
+    ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables;
+    ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs;
+    ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state;
+    svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data;
+    svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data;
+    coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos;
+    const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1};
+
+    UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id;
+    WORD32 i4_num_ilp_mvs = 0;
+
+    s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
+    s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
+    s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
+    s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
+    s_ref_frame_dims.i4_abscissa =
+        ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
+    s_ref_frame_dims.i4_ordinate =
+        ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
+    s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE;
+    s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE;
+
+    ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
+    ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id];
+    ps_mb_state =
+        &ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa +
+                                      ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa];
+
+    for(i = 0; i < MAX_PU_IN_MB_COL; i++)
+    {
+        for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
+        {
+            b_is_mv_non_identical = true;
+
+            ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv;
+            ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv;
+
+            ps_ref_mb_info =
+                &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa +
+                                               ps_mb_state->as_mb_positions[i][j].i4_ordinate *
+                                                   s_ref_frame_dims_in_mbs.i4_abscissa];
+
+            if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
+            {
+                ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] =
+                    ps_ref_mb_info->u2_mb_type;
+
+                ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] =
+                    ps_ref_mb_info->as_pu->u1_pred_mode;
+
+                if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0)
+                {
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] =
+                        ps_ref_mb_info->as_pu->as_me_info[L1];
+
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx *
+                             ps_layer_state->s_mv_scale.i4_abscissa +
+                         32768) >>
+                        16;
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy *
+                             ps_layer_state->s_mv_scale.i4_ordinate +
+                         32768) >>
+                        16;
+                }
+
+                if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1)
+                {
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] =
+                        ps_ref_mb_info->as_pu->as_me_info[L0];
+
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx *
+                             ps_layer_state->s_mv_scale.i4_abscissa +
+                         32768) >>
+                        16;
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy *
+                             ps_layer_state->s_mv_scale.i4_ordinate +
+                         32768) >>
+                        16;
+                }
+
+                if(i4_num_ilp_mvs == 0)
+                {
+                    i4_num_ilp_mvs++;
+                }
+                else
+                {
+                    for(k = i4_num_ilp_mvs - 1; k >= 0; k--)
+                    {
+                        if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] ==
+                            ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) &&
+                           (ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] ==
+                            ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) &&
+                           isvce_check_identical_mv(
+                               ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k],
+                               ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs],
+                               ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k]))
+                        {
+                            b_is_mv_non_identical = false;
+                        }
+                    }
+
+                    if(b_is_mv_non_identical)
+                    {
+                        i4_num_ilp_mvs++;
+                    }
+                }
+            }
+            else
+            {
+                ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE;
+            }
+        }
+    }
+
+    ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs = i4_num_ilp_mvs;
+
+    for(i = 0; i < MAX_ILP_MV_IN_NBR_RGN; i++)
+    {
+        b_is_mv_non_identical = true;
+
+        ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] = s_default_mv;
+        ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] = s_default_mv;
+
+        if(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] >= 0 &&
+           ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0] < s_frame_dims_in_mbs.i4_abscissa &&
+           ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] >= 0 &&
+           ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1] < s_frame_dims_in_mbs.i4_ordinate)
+        {
+            ps_mb_state =
+                &ps_layer_state->ps_mb_states[(ps_mb_pos->i4_abscissa + gai1_nbr_ilp_mv_map[i][0]) +
+                                              (ps_mb_pos->i4_ordinate + gai1_nbr_ilp_mv_map[i][1]) *
+                                                  s_frame_dims_in_mbs.i4_abscissa];
+
+            ps_ref_mb_info =
+                &ps_ref_layer_data->ps_mb_info[(ps_mb_state
+                                                    ->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]]
+                                                                     [gai1_nbr_ilp_mv_map[i][3]]
+                                                    .i4_abscissa) +
+                                               ps_mb_state
+                                                       ->as_mb_positions[gai1_nbr_ilp_mv_map[i][2]]
+                                                                        [gai1_nbr_ilp_mv_map[i][3]]
+                                                       .i4_ordinate *
+                                                   s_ref_frame_dims_in_mbs.i4_abscissa];
+
+            if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
+            {
+                ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] =
+                    ps_ref_mb_info->u2_mb_type;
+
+                ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] =
+                    ps_ref_mb_info->as_pu->u1_pred_mode;
+
+                if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L0)
+                {
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1] =
+                        ps_ref_mb_info->as_pu->as_me_info[L1];
+
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvx *
+                             ps_layer_state->s_mv_scale.i4_abscissa +
+                         32768) >>
+                        16;
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L1].s_mv.i2_mvy *
+                             ps_layer_state->s_mv_scale.i4_ordinate +
+                         32768) >>
+                        16;
+                }
+
+                if(ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs] != L1)
+                {
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0] =
+                        ps_ref_mb_info->as_pu->as_me_info[L0];
+
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvx *
+                             ps_layer_state->s_mv_scale.i4_abscissa +
+                         32768) >>
+                        16;
+                    ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy =
+                        (ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs][L0].s_mv.i2_mvy *
+                             ps_layer_state->s_mv_scale.i4_ordinate +
+                         32768) >>
+                        16;
+                }
+
+                if(i4_num_ilp_mvs == 0)
+                {
+                    i4_num_ilp_mvs++;
+                }
+                else
+                {
+                    for(k = i4_num_ilp_mvs - 1; k >= 0; k--)
+                    {
+                        if((ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[k] ==
+                            ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs]) &&
+                           (ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k] ==
+                            ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[i4_num_ilp_mvs]) &&
+                           isvce_check_identical_mv(
+                               ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[k],
+                               ps_ilp_mv_outputs->s_ilp_me_cands.as_mv[i4_num_ilp_mvs],
+                               ps_ilp_mv_outputs->s_ilp_me_cands.ae_pred_mode[k]))
+                            b_is_mv_non_identical = false;
+                    }
+
+                    if(b_is_mv_non_identical)
+                    {
+                        i4_num_ilp_mvs++;
+                    }
+                }
+            }
+            else
+            {
+                ps_ilp_mv_outputs->s_ilp_me_cands.e_mb_type[i4_num_ilp_mvs] = INVALID_MB_TYPE;
+            }
+        }
+    }
+
+    ps_ilp_mv_outputs->s_ilp_me_cands.u4_num_ilp_mvs_incl_nbrs = i4_num_ilp_mvs;
+}
+
+void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt)
+{
+    svc_layer_data_t *ps_ref_layer_data;
+    ilp_mv_layer_state_t *ps_layer_state;
+    ilp_mv_mb_state_t *ps_mb_state;
+    isvce_mb_info_t *ps_ref_mb_info;
+    coordinates_t s_frame_dims;
+    coordinates_t s_frame_dims_in_mbs;
+    coordinates_t s_ref_frame_dims;
+    coordinates_t s_ref_frame_dims_in_mbs;
+
+    WORD32 i, j;
+
+    ilp_mv_constants_t *ps_ilp_mv_constants = &ps_ilp_mv_ctxt->s_ilp_mv_constants;
+    ilp_mv_variables_t *ps_ilp_mv_variables = &ps_ilp_mv_ctxt->s_ilp_mv_variables;
+    ilp_mv_outputs_t *ps_ilp_mv_outputs = &ps_ilp_mv_ctxt->s_ilp_mv_outputs;
+    ilp_mv_state_t *ps_ilp_mv_state = (ilp_mv_state_t *) ps_ilp_mv_constants->pv_state;
+    svc_ilp_data_t *ps_svc_ilp_data = ps_ilp_mv_variables->ps_svc_ilp_data;
+    svc_au_data_t *ps_svc_au_data = ps_svc_ilp_data->ps_svc_au_data;
+    coordinates_t *ps_mb_pos = &ps_ilp_mv_variables->s_mb_pos;
+    const isvce_enc_pu_mv_t s_default_mv = {{0, 0}, -1};
+
+    UWORD8 u1_spatial_layer_id = ps_ilp_mv_variables->u1_spatial_layer_id;
+
+    s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
+    s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
+    s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
+    s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
+    s_ref_frame_dims.i4_abscissa =
+        ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
+    s_ref_frame_dims.i4_ordinate =
+        ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
+    s_ref_frame_dims_in_mbs.i4_abscissa = s_ref_frame_dims.i4_abscissa / MB_SIZE;
+    s_ref_frame_dims_in_mbs.i4_ordinate = s_ref_frame_dims.i4_ordinate / MB_SIZE;
+
+    ps_ref_layer_data = &ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
+    ps_layer_state = &ps_ilp_mv_state->ps_layer_state[u1_spatial_layer_id];
+    ps_mb_state =
+        &ps_layer_state->ps_mb_states[ps_mb_pos->i4_abscissa +
+                                      ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa];
+
+    ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv;
+    ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv;
+
+    ps_ref_mb_info = &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[0][0].i4_abscissa +
+                                                    ps_mb_state->as_mb_positions[0][0].i4_ordinate *
+                                                        s_ref_frame_dims_in_mbs.i4_abscissa];
+
+    if((ps_ref_mb_info->u2_mb_type == P16x16) || (ps_ref_mb_info->u2_mb_type == B16x16))
+    {
+        ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = ps_ref_mb_info->u2_mb_type;
+
+        ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = ps_ref_mb_info->as_pu->u1_pred_mode;
+
+        if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0)
+        {
+            ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = ps_ref_mb_info->as_pu->as_me_info[L1];
+        }
+
+        if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1)
+        {
+            ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = ps_ref_mb_info->as_pu->as_me_info[L0];
+        }
+    }
+    else
+    {
+        ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
+    }
+
+    /* Function call to get non 16x16 ilp mvs for me candidates */
+    isvce_get_ilp_mvs_for_me(ps_ilp_mv_ctxt);
+
+    /* Encoder supports only 16x16 partition. */
+    /* The code below ensures only 16x16 ILP MV's are used */
+    for(i = 0; i < MAX_PU_IN_MB_COL; i++)
+    {
+        for(j = 0; j < MAX_PU_IN_MB_ROW; j++)
+        {
+            bool b_unsupported_mv;
+
+            ps_ref_mb_info =
+                &ps_ref_layer_data->ps_mb_info[ps_mb_state->as_mb_positions[i][j].i4_abscissa +
+                                               ps_mb_state->as_mb_positions[i][j].i4_ordinate *
+                                                   s_ref_frame_dims_in_mbs.i4_abscissa];
+
+            b_unsupported_mv =
+                (ps_ref_mb_info->u2_mb_type != ps_ilp_mv_outputs->s_ilp_mv.e_mb_type) ||
+                (ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] !=
+                 ps_ref_mb_info->as_pu->u1_pred_mode) ||
+                !isvce_check_identical_mv(ps_ilp_mv_outputs->s_ilp_mv.as_mv[0],
+                                          ps_ref_mb_info->as_pu->as_me_info,
+                                          ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0]);
+
+            if(b_unsupported_mv)
+            {
+                ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0] = s_default_mv;
+                ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1] = s_default_mv;
+                ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
+
+                return;
+            }
+        }
+    }
+
+    if(ps_ilp_mv_outputs->s_ilp_mv.e_mb_type != INVALID_MB_TYPE)
+    {
+        if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L0)
+        {
+            ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx =
+                (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvx *
+                     ps_layer_state->s_mv_scale.i4_abscissa +
+                 32768) >>
+                16;
+            ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy =
+                (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L1].s_mv.i2_mvy *
+                     ps_layer_state->s_mv_scale.i4_ordinate +
+                 32768) >>
+                16;
+        }
+
+        if(ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] != L1)
+        {
+            ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx =
+                (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvx *
+                     ps_layer_state->s_mv_scale.i4_abscissa +
+                 32768) >>
+                16;
+            ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy =
+                (ps_ilp_mv_outputs->s_ilp_mv.as_mv[0][L0].s_mv.i2_mvy *
+                     ps_layer_state->s_mv_scale.i4_ordinate +
+                 32768) >>
+                16;
+        }
+    }
+    else
+    {
+        ps_ilp_mv_outputs->s_ilp_mv.e_mb_type = INVALID_MB_TYPE;
+        ps_ilp_mv_outputs->s_ilp_mv.ae_pred_mode[0] = INVALID_PRED_MODE;
+    }
+}
+
+void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp,
+                        isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs)
+{
+    if(USE_ILP_MV_AS_MVP && ps_ilp_mvp && !ps_mb_info->u1_is_intra &&
+       (ps_mb_info->u2_mb_type != PSKIP) && (ps_mb_info->u2_mb_type != BSKIP) &&
+       (ps_mb_info->u2_mb_type != BASE_MODE))
+    {
+        isvce_enc_pu_mv_t *ps_mv;
+        isvce_enc_pu_mv_t *aps_mvps[2];
+
+        WORD32 ai4_mvd_costs[2];
+        WORD32 i, j;
+
+        for(i = 0; i < NUM_PRED_DIRS; i++)
+        {
+            PRED_MODE_T e_pred_mode = (PRED_MODE_T) i;
+            PRED_MODE_T e_cmpl_pred_mode = (e_pred_mode == L0) ? L1 : L0;
+
+            if(ps_mb_info->as_pu->u1_pred_mode != e_pred_mode)
+            {
+                ps_mv = &ps_mb_info->as_pu->as_me_info[e_cmpl_pred_mode];
+                aps_mvps[0] = &ps_spatial_mvp[e_cmpl_pred_mode];
+                aps_mvps[1] = &ps_ilp_mvp[e_cmpl_pred_mode];
+
+                for(j = 0; j < 2; j++)
+                {
+                    if((aps_mvps[j]->i1_ref_idx != -1) &&
+                       (!j || ((j == 1) && (ps_mv->i1_ref_idx == aps_mvps[j]->i1_ref_idx))))
+                    {
+                        ai4_mvd_costs[j] =
+                            pu1_mvd_costs[ps_mv->s_mv.i2_mvx - aps_mvps[j]->s_mv.i2_mvx] +
+                            pu1_mvd_costs[ps_mv->s_mv.i2_mvy - aps_mvps[j]->s_mv.i2_mvy];
+                    }
+                    else
+                    {
+                        ai4_mvd_costs[j] = INT32_MAX;
+                    }
+                }
+
+                ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] =
+                    ai4_mvd_costs[0] > ai4_mvd_costs[1];
+            }
+            else
+            {
+                ps_mb_info->as_pu->au1_mvp_idx[e_cmpl_pred_mode] = 0;
+            }
+        }
+    }
+    else
+    {
+        ps_mb_info->as_pu->au1_mvp_idx[L0] = 0;
+        ps_mb_info->as_pu->au1_mvp_idx[L1] = 0;
+    }
+}
--- a/encoder/svc/isvce_ilp_mv.h
+++ b/encoder/svc/isvce_ilp_mv.h
@ -0,0 +1,115 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_ilp_mv.h
+*
+* @brief
+*  Contains function declarations for function declared in
+*  isvce_ilp_mv.c
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_ILP_MV_H_
+#define _ISVCE_ILP_MV_H_
+
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "isvc_macros.h"
+#include "ih264_debug.h"
+#include "isvc_defs.h"
+#include "isvc_structs.h"
+#include "isvce_defs.h"
+#include "isvce_pred_structs.h"
+#include "isvce_structs.h"
+#include "isvce_structs.h"
+#include "isvce_utils.h"
+
+/* Structs */
+typedef struct ilp_mv_constants_t
+{
+    void *pv_state;
+} ilp_mv_constants_t;
+
+typedef struct ilp_mv_outputs_t
+{
+    ilp_mv_t s_ilp_mv;
+
+    ilp_me_cands_t s_ilp_me_cands;
+
+} ilp_mv_outputs_t;
+
+typedef struct ilp_mv_variables_t
+{
+    svc_ilp_data_t *ps_svc_ilp_data;
+
+    coordinates_t s_mb_pos;
+
+    UWORD8 u1_spatial_layer_id;
+} ilp_mv_variables_t;
+
+typedef struct svc_ilp_mv_ctxt_t
+{
+    ilp_mv_constants_t s_ilp_mv_constants;
+
+    ilp_mv_variables_t s_ilp_mv_variables;
+
+    ilp_mv_outputs_t s_ilp_mv_outputs;
+
+} svc_ilp_mv_ctxt_t;
+
+/* Function declarations */
+extern UWORD32 isvce_get_ilp_mv_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
+                                          UWORD32 u4_wd, UWORD32 u4_ht);
+
+extern void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
+
+extern void isvce_get_mb_ilp_mv(svc_ilp_mv_ctxt_t *ps_ilp_mv_ctxt);
+
+extern void isvce_mvp_idx_eval(isvce_mb_info_t *ps_mb_info, isvce_enc_pu_mv_t *ps_spatial_mvp,
+                               isvce_enc_pu_mv_t *ps_ilp_mvp, UWORD8 *pu1_mvd_costs);
+
+static FORCEINLINE UWORD8 isvce_is_ilp_mv_winning_mv(isvce_mb_info_t *ps_mb_info,
+                                                     ilp_mv_t *ps_ilp_mv)
+{
+    if(ENABLE_ILP_MV && ps_ilp_mv && (ps_mb_info->u2_mb_type != PSKIP) &&
+       (ps_mb_info->u2_mb_type != BSKIP))
+    {
+        if((ps_mb_info->u2_mb_type == ps_ilp_mv->e_mb_type) &&
+           (((PRED_MODE_T) ps_mb_info->as_pu->u1_pred_mode) == ps_ilp_mv->ae_pred_mode[0]))
+        {
+            return isvce_check_identical_mv(ps_mb_info->as_pu->as_me_info, ps_ilp_mv->as_mv[0],
+                                            ps_ilp_mv->ae_pred_mode[0]);
+        }
+    }
+
+    return 0;
+}
+
+#endif
--- a/encoder/svc/isvce_ilp_mv_private_defs.h
+++ b/encoder/svc/isvce_ilp_mv_private_defs.h
@ -0,0 +1,68 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvc_svc_ilp_mv_private_defs.h
+*
+* @brief
+*  Contains datatype and macro definitions used exclusively in
+*  ILP MV derivations
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_ILP_MV_PRIVATE_DEFS_H_
+#define _ISVCE_ILP_MV_PRIVATE_DEFS_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_defs.h"
+#include "isvc_structs.h"
+#include "isvce_structs.h"
+
+/* Structs */
+/* Offsets, etc used for resLayer MV upsampling */
+/* Derived as per 'G.8.6.1.1' for all MB's once during init */
+typedef struct ilp_mv_mb_state_t
+{
+    coordinates_t as_pu_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW];
+
+    coordinates_t as_mb_positions[MAX_PU_IN_MB_COL][MAX_PU_IN_MB_ROW];
+} ilp_mv_mb_state_t;
+
+typedef struct ilp_mv_layer_state_t
+{
+    layer_resampler_props_t *ps_props;
+
+    ilp_mv_mb_state_t *ps_mb_states;
+
+    coordinates_t s_mv_scale;
+
+} ilp_mv_layer_state_t;
+
+typedef struct ilp_mv_state_t
+{
+    /* Array of size numSpatialLayers */
+    ilp_mv_layer_state_t *ps_layer_state;
+
+} ilp_mv_state_t;
+
+#endif
--- a/encoder/svc/isvce_ilp_mv_utils.h
+++ b/encoder/svc/isvce_ilp_mv_utils.h
@ -0,0 +1,111 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+/**
+*******************************************************************************
+* @file
+*  isvce_ilp_mv_utils.h
+*
+* @brief
+*  Defs to perform experiments in ilp mv
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _ISVCE_ILP_MV_UTILS_H_
+#define _ISVCE_ILP_MV_UTILS_H_
+
+#include <stdbool.h>
+
+#include "ih264_typedefs.h"
+#include "isvc_defs.h"
+#include "isvc_macros.h"
+#include "isvce_pred_structs.h"
+#include "isvce_structs.h"
+
+#define MAX_CAND_IF_NUM_ILP_MV_LT_2 8
+#define MAX_CAND_IF_NUM_ILP_MV_GTEQ_2 6
+
+/* nbr_mb.x, nbr_mb.y, pu_pos.x, pu_pos.y */
+#define NBR_PU_AND_MB_POS 4
+
+static const WORD8 gai1_nbr_ilp_mv_map[MAX_ILP_MV_IN_NBR_RGN][NBR_PU_AND_MB_POS] = {
+    {-1, 0, 3, 0},
+    {0, -1, 0, 3},
+    {1, 0, 0, 0},
+    {0, 1, 0, 0},
+};
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function checks if the max difference between ILP MVs is less than four
+* or not if number of ILP MVs is greater than or equal to two
+*
+* @param[in] ps_me
+*  Pointer to ilp_me_cands
+*
+* @returns  One if number of ILP MVs is greater than equal to two and max
+* difference between them is less than 4 otherwise returns zero
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static FORCEINLINE bool isvce_check_max_mv_diff_lt_4(ilp_me_cands_t *ps_ilp_me_cands,
+                                                     WORD32 i4_reflist)
+{
+    UWORD32 i, j;
+    UWORD32 u4_mv_diff_x, u4_mv_diff_y;
+
+    for(i = 1; i < ps_ilp_me_cands->u4_num_ilp_mvs; i++)
+    {
+        for(j = 0; j < i; j++)
+        {
+            if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
+                ((ps_ilp_me_cands->ae_pred_mode[i] == BI))) &&
+               ((ps_ilp_me_cands->ae_pred_mode[j] == ((PRED_MODE_T) i4_reflist)) ||
+                ((ps_ilp_me_cands->ae_pred_mode[j] == BI))))
+            {
+                u4_mv_diff_x = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvx -
+                                   ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvx);
+
+                u4_mv_diff_y = ABS(ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv.i2_mvy -
+                                   ps_ilp_me_cands->as_mv[j][i4_reflist].s_mv.i2_mvy);
+
+                if(u4_mv_diff_x >= 4 || u4_mv_diff_y >= 4)
+                {
+                    return false;
+                }
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+#endif
--- a/encoder/svc/isvce_interface_structs.h
+++ b/encoder/svc/isvce_interface_structs.h
@ -0,0 +1,116 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_interface_structs.h
+*
+* @brief
+*  Contains struct definition used for interface objects such as input,
+*  output, and rec
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_INTERFACE_STRUCTS_H_
+#define _ISVCE_INTERFACE_STRUCTS_H_
+
+#include "isvc_structs.h"
+
+typedef struct isvce_raw_inp_buf_t
+{
+    /** Descriptor of raw buffer                                     */
+    iv_raw_buf_t s_raw_buf;
+
+    /** Lower 32bits of time stamp corresponding to the above buffer */
+    UWORD32 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to the above buffer */
+    UWORD32 u4_timestamp_high;
+
+    /** Flag to indicate if the current buffer is last buffer */
+    UWORD32 u4_is_last;
+
+    /** Flag to indicate if mb info is sent along with input buffer     */
+    UWORD32 u4_mb_info_type;
+
+    /** Flag to indicate the size of mb info structure                  */
+    UWORD32 u4_mb_info_size;
+
+    /** Buffer containing mb info if isvce_mb_info_type is non-zero           */
+    void *pv_mb_info;
+
+    /** Flag to indicate if pic info is sent along with input buffer     */
+    UWORD32 u4_pic_info_type;
+
+    /** Buffer containing pic info if isvce_mb_info_type is non-zero           */
+    void *pv_pic_info;
+
+    /** SEI CCV params flag                                              */
+    UWORD8 u1_sei_ccv_params_present_flag;
+
+    /** SEI CCV params info                                              */
+    sei_ccv_params_t s_sei_ccv;
+
+} isvce_raw_inp_buf_t;
+
+typedef struct
+{
+    /** Descriptor of bitstream buffer                                     */
+    iv_bits_buf_t as_bits_buf[MAX_NUM_SPATIAL_LAYERS];
+
+    /** Lower 32bits of time stamp corresponding to the above buffer */
+    UWORD32 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to the above buffer */
+    UWORD32 u4_timestamp_high;
+
+    /** Flag to indicate if the current buffer is last buffer */
+    UWORD32 u4_is_last;
+
+} isvce_out_buf_t;
+
+typedef struct
+{
+    /** Descriptor of picture buffer                                     */
+    svc_au_buf_t s_pic_buf;
+
+    /** Lower 32bits of time stamp corresponding to the above buffer */
+    UWORD32 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to the above buffer */
+    UWORD32 u4_timestamp_high;
+
+    /** Flag to indicate if the current buffer is last buffer */
+    UWORD32 u4_is_last;
+
+    /** Picture count corresponding to current picture */
+    WORD32 i4_pic_cnt;
+
+} isvce_rec_buf_t;
+
+#endif
--- a/encoder/svc/isvce_intra_modes_eval.c
+++ b/encoder/svc/isvce_intra_modes_eval.c
--- a/encoder/svc/isvce_intra_modes_eval.h
+++ b/encoder/svc/isvce_intra_modes_eval.h
@ -0,0 +1,361 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_intra_modes_eval.h
+*
+* @brief
+*  This file contains declarations of routines that perform rate distortion
+*  analysis on a macroblock if coded as intra.
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_INTRA_MODES_EVAL_H_
+#define _ISVCE_INTRA_MODES_EVAL_H_
+
+/**
+******************************************************************************
+*
+* @brief
+*  derivation process for subblock/partition availability
+*
+* @par   Description
+*  Calculates the availability of the left, top, topright and topleft subblock
+*  or partitions.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to macroblock context (handle)
+*
+* @param[in]    i1_pel_pos_x
+*  column position of the pel wrt the current block
+*
+* @param[in]    i1_pel_pos_y
+*  row position of the pel in wrt current block
+*
+* @remarks     Assumptions: before calling this function it is assumed that
+*   the neighbor availability of the current macroblock is already derived.
+*   Based on table 6-3 of H264 specification
+*
+* @return      availability status (yes or no)
+*
+******************************************************************************
+*/
+UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *s_ngbr_avbl, WORD8 i1_pel_pos_x,
+                                               WORD8 i1_pel_pos_y);
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 16x16 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible intra 16x16 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]   ps_proc_ctxt
+*  pointer to process context (handle)
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
+*  the SAD and cost are one and the same.
+*
+* @return     none
+*
+******************************************************************************
+*/
+void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 8x8 mode (rate distortion opt on)
+*
+* @par Description
+*  This function evaluates all the possible intra 8x8 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: TODO: This function needs to be tested
+*
+*  @return      none
+*
+******************************************************************************
+*/
+void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 4x4 mode (rate distortion opt on)
+*
+* @par Description
+*  This function evaluates all the possible intra 4x4 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+*  24*lambda is added to the SAD before comparison with the best SAD for
+*  inter prediction. This is an empirical value to prevent using too many intra
+*  blocks.
+*
+* @return      none
+*
+******************************************************************************
+*/
+void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc_ctxt);
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 4x4 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible intra 4x4 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+*  24*lambda is added to the SAD before comparison with the best SAD for
+*  inter prediction. This is an empirical value to prevent using too many intra
+*  blocks.
+*
+* @return      none
+*
+******************************************************************************
+*/
+void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc_ctxt);
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best chroma intra 8x8 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible chroma intra 8x8 modes and finds
+*  the mode that best represents the macroblock (least distortion) and occupies
+*  fewer bits in the bitstream.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to macroblock context (handle)
+*
+* @remarks
+*  For chroma best intra pred mode is calculated based only on SAD
+*
+* @returns none
+*
+******************************************************************************
+*/
+void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(
+    isvce_process_ctxt_t *ps_proc_ctxt);
+
+/**
+******************************************************************************
+*
+* @brief
+*  Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+*  prediction.
+*
+* @par Description
+*  This function evaluates first three 16x16 modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+*  Says what all modes are valid
+*
+* @returns      none
+*
+******************************************************************************
+*/
+typedef void isvce_evaluate_intra_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16,
+                                           UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd,
+                                           WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+                                           WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes);
+
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes;
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes;
+
+/* assembly */
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_a9q;
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_a9q;
+
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_av8;
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_av8;
+
+/* x86 intrinsics */
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra16x16_modes_ssse3;
+isvce_evaluate_intra_modes_ft isvce_evaluate_intra_chroma_modes_ssse3;
+
+/**
+******************************************************************************
+*
+* @brief
+*  Evaluate best intra 4x4 mode and perform prediction.
+*
+* @par Description
+*  This function evaluates  4x4 modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+*  Says what all modes are valid
+*
+* @param[in] u4_lambda
+*  Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+*  Predicted mode for cost computation
+*
+* @returns      none
+*
+******************************************************************************
+*/
+typedef void isvce_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels,
+                                               UWORD8 *pu1_dst, UWORD32 src_strd, UWORD32 dst_strd,
+                                               WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+                                               WORD32 *pu4_sadmin, UWORD32 u4_valid_intra_modes,
+                                               UWORD32 u4_lambda, UWORD32 u4_predictd_mode);
+
+isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes;
+
+/* x86 intrinsics */
+isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_ssse3;
+
+/* assembly */
+isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_a9q;
+isvce_evaluate_intra_4x4_modes_ft isvce_evaluate_intra_4x4_modes_av8;
+
+#endif
--- a/encoder/svc/isvce_mc.c
+++ b/encoder/svc/isvce_mc.c
@ -0,0 +1,480 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+ *******************************************************************************
+ * @file
+ *  isvce_mc.c
+ *
+ * @brief
+ *  Contains definition of functions for motion compensation
+ *
+ * @author
+ *  ittiam
+ *
+ * @par List of Functions:
+ *  - isvce_motion_comp_luma()
+ *  - isvce_motion_comp_chroma()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_debug.h"
+#include "isvc_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "isvc_structs.h"
+#include "isvc_inter_pred_filters.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "isvc_cabac_tables.h"
+#include "isvce_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "isvce_mc.h"
+#include "ih264e_half_pel.h"
+#include "isvce_ibl_eval.h"
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ *
+ * @brief
+ *  performs motion compensation for a luma mb for the given mv.
+ *
+ * @par Description
+ *  This routine performs motion compensation of an inter mb. When the inter
+ *  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+ *  to pred buffer. In this case the function returns pointer and stride of the
+ *  ref. buffer and this info is used in place of pred buffer else where.
+ *  In other cases, the pred buffer is populated via copy / filtering + copy
+ *  (q pel cases) and returned.
+ *
+ * @param[in] ps_proc
+ *  pointer to current proc ctxt
+ *
+ * @return  none
+ *
+ * @remarks Assumes half pel buffers for the entire frame are populated.
+ *
+ ******************************************************************************
+ */
+void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
+{
+    /* codec context */
+    isvce_codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* me ctxt */
+    isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
+    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
+
+    /* Pointer to the structure having motion vectors, size and position of curr
+     * partitions */
+    isvce_enc_pu_t *ps_curr_pu;
+
+    /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
+     */
+    UWORD8 *pu1_ref[4];
+
+    /* pred buffer ptr */
+    UWORD8 *pu1_pred;
+
+    /* strides of full pel, half pel x, half pel y, half pel xy reference buffer
+     */
+    WORD32 i4_ref_strd[4];
+
+    /* pred buffer stride */
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+    /* full pel motion vectors */
+    WORD32 u4_mv_x_full, u4_mv_y_full;
+
+    /* half pel motion vectors */
+    WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+    /* quarter pel motion vectors */
+    WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+    /* width & height of the partition */
+    UWORD32 wd, ht;
+
+    /* partition idx */
+    UWORD32 u4_num_prtn;
+
+    /* half / qpel coefficient */
+    UWORD32 u4_subpel_factor;
+
+    /* BIPRED Flag */
+    WORD32 i4_bipred_flag;
+
+    /* temp var */
+    UWORD32 u4_lkup_idx1;
+
+    if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
+    {
+        svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
+
+        ps_pred->pv_data =
+            (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y]
+                            .pv_data);
+        ps_pred->i4_data_stride =
+            ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y].i4_data_stride;
+
+        return;
+    }
+
+    /* Init */
+    i4_ref_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride;
+
+    i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_subpel_buf_strd;
+
+    for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+    {
+        mv_t *ps_curr_mv;
+
+        /* update ptr to curr partition */
+        ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
+
+        /* Set no no bipred */
+        i4_bipred_flag = 0;
+
+        switch(ps_curr_pu->u1_pred_mode)
+        {
+            case PRED_L0:
+                ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
+                pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
+                break;
+
+            case PRED_L1:
+                ps_curr_mv = &ps_curr_pu->as_me_info[1].s_mv;
+                pu1_ref[0] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data;
+                break;
+
+            case PRED_BI:
+                /*
+                 * In case of PRED_BI, we only need to ensure that
+                 * the reference buffer that gets selected is
+                 * ps_proc->pu1_best_subpel_buf
+                 */
+
+                /* Dummy */
+                ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
+                pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
+
+                i4_bipred_flag = 1;
+                break;
+
+            default:
+                ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
+                pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
+                break;
+        }
+
+        /* get full pel mv's (full pel units) */
+        u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
+        u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
+
+        /* get half pel mv's */
+        u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+        u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
+
+        /* get quarter pel mv's */
+        u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+        u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
+
+        /* width and height of partition */
+        wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 2;
+        ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 2;
+
+        /* decision ? qpel/hpel, fpel */
+        u4_subpel_factor =
+            (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
+
+        /* Move ref to position given by MV */
+        pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
+
+        /* Sub pel ptrs/ Biperd pointers init */
+        pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
+        i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
+
+        /* update pred buff ptr */
+        pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
+                   4 * ps_curr_pu->u1_pos_x_in_4x4;
+
+        /* u4_lkup_idx1 will be non zero for half pel and bipred */
+        u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
+
+        {
+            /********************************************************************/
+            /* if the block is P16x16 MB and mv are not quarter pel motion      */
+            /* vectors, there is no need to copy 16x16 unit from reference frame*/
+            /* to pred buffer. We might as well send the reference frame buffer */
+            /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
+            /* and inverse transform unit.                                      */
+            /********************************************************************/
+            if(ps_proc->u4_num_sub_partitions == 1)
+            {
+                ps_pred->pv_data = pu1_ref[u4_lkup_idx1];
+                ps_pred->i4_data_stride = i4_ref_strd[u4_lkup_idx1];
+            }
+            /*
+             * Copying half pel or full pel to prediction buffer
+             * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only
+             * support 16x16 in P mbs
+             */
+            else
+            {
+                ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred,
+                                                            i4_ref_strd[u4_lkup_idx1], i4_pred_strd,
+                                                            ht, wd, NULL, 0);
+            }
+        }
+    }
+}
+
+/**
+ ******************************************************************************
+ *
+ * @brief
+ *  performs motion compensation for chroma mb
+ *
+ * @par   Description
+ *  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+ *  according to the motion vectors given
+ *
+ * @param[in] ps_proc
+ *  pointer to current proc ctxt
+ *
+ * @return  none
+ *
+ * @remarks Assumes half pel and quarter pel buffers for the entire frame are
+ *  populated.
+ ******************************************************************************
+ */
+void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
+{
+    /* codec context */
+    isvce_codec_t *ps_codec = ps_proc->ps_codec;
+    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
+    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
+
+    /* Pointer to the structure having motion vectors, size and position of curr
+     * partitions */
+    isvce_enc_pu_t *ps_curr_pu;
+
+    /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
+     */
+    UWORD8 *pu1_ref;
+
+    /* pred buffer ptr */
+    UWORD8 *pu1_pred;
+
+    /* strides of full pel reference buffer */
+    WORD32 i4_ref_strd;
+
+    /* pred buffer stride */
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+    /* full pel motion vectors */
+    WORD32 u4_mv_x_full, u4_mv_y_full;
+
+    /* half pel motion vectors */
+    WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+    /* quarter pel motion vectors */
+    WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+    /* width & height of the partition */
+    UWORD32 wd, ht;
+
+    /* partition idx */
+    UWORD32 u4_num_prtn;
+
+    WORD32 u4_mv_x;
+    WORD32 u4_mv_y;
+    UWORD8 u1_dx, u1_dy;
+
+    ASSERT(ps_proc->u4_num_sub_partitions <= ENC_MAX_PU_IN_MB);
+
+    if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
+    {
+        svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
+
+        ps_pred->pv_data =
+            (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
+                            .pv_data);
+        ps_pred->i4_data_stride =
+            ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
+                .i4_data_stride;
+
+        return;
+    }
+    else
+    {
+        ps_pred->pv_data = ps_proc->pu1_pred_mb;
+        ps_pred->i4_data_stride = ps_proc->i4_pred_strd;
+    }
+
+    for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+    {
+        mv_t *ps_curr_mv;
+
+        ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
+
+        if(ps_curr_pu->u1_pred_mode != BI)
+        {
+            ps_curr_mv = &ps_curr_pu->as_me_info[ps_curr_pu->u1_pred_mode].s_mv;
+            pu1_ref =
+                ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode].as_component_bufs[1].pv_data;
+            i4_ref_strd = ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode]
+                              .as_component_bufs[1]
+                              .i4_data_stride;
+
+            u4_mv_x = ps_curr_mv->i2_mvx >> 3;
+            u4_mv_y = ps_curr_mv->i2_mvy >> 3;
+
+            /*  corresponds to full pel motion vector in luma, but in chroma
+             * corresponds to pel formed wiith dx, dy =4 */
+            u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
+            u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
+
+            /* get half pel mv's */
+            u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+            u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
+
+            /* get quarter pel mv's */
+            u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+            u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
+
+            /* width and height of sub macro block */
+            wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
+            ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
+
+            /* move the pointers so that they point to the motion compensated
+             * locations */
+            pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
+
+            pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
+                       2 * ps_curr_pu->u1_pos_x_in_4x4;
+
+            u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
+            u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+
+            /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
+             * separate functions for better performance
+             *
+             * isvc_inter_pred_chroma_dx_zero_a9q
+             * and
+             * isvc_inter_pred_chroma_dy_zero_a9q
+             */
+
+            ps_inter_pred_fxns->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
+                                                     u1_dx, u1_dy, ht, wd);
+        }
+        else
+        {
+            /*
+             * We need to interpolate the L0 and L1 ref pics with the chorma MV
+             * then use them to average for bilinrar interpred
+             */
+            WORD32 i4_predmode;
+            UWORD8 *pu1_ref_buf[2];
+
+            /* Temporary buffers to store the interpolated value from L0 and L1 */
+            pu1_ref_buf[L0] = ps_proc->apu1_subpel_buffs[0];
+            pu1_ref_buf[L1] = ps_proc->apu1_subpel_buffs[1];
+
+            for(i4_predmode = 0; i4_predmode < BI; i4_predmode++)
+            {
+                ps_curr_mv = &ps_curr_pu->as_me_info[i4_predmode].s_mv;
+                pu1_ref = ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].pv_data;
+                i4_ref_strd =
+                    ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].i4_data_stride;
+
+                u4_mv_x = ps_curr_mv->i2_mvx >> 3;
+                u4_mv_y = ps_curr_mv->i2_mvy >> 3;
+
+                /*
+                 * corresponds to full pel motion vector in luma, but in chroma
+                 * corresponds to pel formed wiith dx, dy =4
+                 */
+                u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
+                u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
+
+                /* get half pel mv's */
+                u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+                u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
+
+                /* get quarter pel mv's */
+                u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+                u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
+
+                /* width and height of sub macro block */
+                wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
+                ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
+
+                /* move the pointers so that they point to the motion compensated
+                 * locations */
+                pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
+
+                pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
+                           2 * ps_curr_pu->u1_pos_x_in_4x4;
+
+                u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
+                u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+
+                ps_inter_pred_fxns->pf_inter_pred_chroma(
+                    pu1_ref, pu1_ref_buf[i4_predmode], i4_ref_strd, MB_SIZE, u1_dx, u1_dy, ht, wd);
+            }
+
+            ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(pu1_ref_buf[L0], pu1_ref_buf[L1],
+                                                            pu1_pred, MB_SIZE, MB_SIZE,
+                                                            i4_pred_strd, MB_SIZE >> 1, MB_SIZE);
+        }
+    }
+}
--- a/encoder/svc/isvce_mc.h
+++ b/encoder/svc/isvce_mc.h
@ -0,0 +1,87 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_mc.h
+*
+* @brief
+*  This file contains declarations of routines that perform motion compensation
+*  of luma and chroma macroblocks.
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+#ifndef _ISVCE_MC_H_
+#define _ISVCE_MC_H_
+
+/**
+******************************************************************************
+*
+* @brief
+*  performs motion compensation for a luma mb for the given mv.
+*
+* @par Description
+*  This routine performs motion compensation of an inter mb. When the inter
+*  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+*  to pred buffer. In this case the function returns pointer and stride of the
+*  ref. buffer and this info is used in place of pred buffer else where.
+*  In other cases, the pred buffer is populated via copy / filtering + copy
+*  (q pel cases) and returned.
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @return  none
+*
+* @remarks Assumes half pel buffers for the entire frame are populated.
+*
+******************************************************************************
+*/
+extern void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred);
+
+/**
+******************************************************************************
+*
+* @brief
+*  performs motion compensation for chroma mb
+*
+* @par   Description
+*  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+*  according to the motion vectors given
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @return  none
+*
+* @remarks Assumes half pel and quarter pel buffers for the entire frame are
+*  populated.
+******************************************************************************
+*/
+extern void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred);
+
+#endif
--- a/encoder/svc/isvce_me.c
+++ b/encoder/svc/isvce_me.c
--- a/encoder/svc/isvce_me.h
+++ b/encoder/svc/isvce_me.h
@ -0,0 +1,381 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+ *******************************************************************************
+ * @file
+ *  isvce_me.h
+ *
+ * @brief
+ *  Contains declarations of global variables for H264 encoder
+ *
+ * @author
+ *  ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef _ISVCE_ME_H_
+#define _ISVCE_ME_H_
+
+#include "ih264_typedefs.h"
+
+#include "isvce_structs.h"
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*  @brief     Skip Bias value for P slice
+******************************************************************************
+*/
+#define SKIP_BIAS_P 0
+
+/**
+******************************************************************************
+*  @brief     Skip Bias value for B slice
+******************************************************************************
+*/
+#define SKIP_BIAS_B 0
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ *  @brief      compute median of 3 elements (a, b, c) and store the output
+ *  in to result. This is used for mv prediction
+ ******************************************************************************
+ */
+
+#define MEDIAN(a, b, c, result) \
+    if(a > b)                   \
+    {                           \
+        if(b > c)               \
+            result = b;         \
+        else                    \
+        {                       \
+            if(a > c)           \
+                result = c;     \
+            else                \
+                result = a;     \
+        }                       \
+    }                           \
+    else                        \
+    {                           \
+        if(c > b)               \
+            result = b;         \
+        else                    \
+        {                       \
+            if(c > a)           \
+                result = c;     \
+            else                \
+                result = a;     \
+        }                       \
+    }
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  This function populates the length of the codewords for motion vectors in
+ *the range (-search range, search range) in pixels
+ *
+ * @param[in] ps_me
+ *  Pointer to me ctxt
+ *
+ * @param[out] pu1_mv_bits
+ *  length of the codeword for all mv's
+ *
+ * @remarks The length of the code words are derived from signed exponential
+ * goloumb codes.
+ *
+ *******************************************************************************
+ */
+void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me);
+
+/**
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a P skip MB
+ *
+ * @par Description:
+ *  The function computes the parameters for a P skip MB
+ *
+ * @param[in] ps_proc
+ *  Process context
+ *
+ * @param[in] u4_for_me
+ *  Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ *  Flag to indicate the current active refernce list
+ *
+ * @returns
+ *       1) Updates skip MV in proc
+ *       2) Returns if the current MB can be coded as skip or not
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+ *   specification.
+ *
+ *******************************************************************************
+ */
+FT_FIND_SKIP_PARAMS isvce_find_pskip_params;
+
+/**
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a P skip MB
+ *
+ * @par Description:
+ *  The function computes the parameters for a P skip MB
+ *
+ * @param[in] ps_proc
+ *  Process context
+ *
+ * @param[in] u4_for_me
+ *  Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ *  Flag to indicate the current active refernce list
+ *
+ * @returns
+ *       1) Updates skip MV in proc
+ *       2) Returns if the current MB can be coded as skip or not
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+ *   specification.
+ *
+ *******************************************************************************
+ */
+FT_FIND_SKIP_PARAMS isvce_find_pskip_params_me;
+
+/**
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a B skip MB
+ *
+ * @par Description:
+ *  The function computes the parameters for a B skip MB
+ *
+ * @param[in] ps_proc
+ *  Process context
+ *
+ * @param[in] u4_for_me
+ *  Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ *  Flag to indicate the current active refernce list
+ *
+ * @returns
+ *       1) Updates skip MV in proc
+ *       2) Returns if the current MB can be coded as skip or not
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+ *   specification.
+ *
+ *******************************************************************************
+ */
+FT_FIND_SKIP_PARAMS isvce_find_bskip_params;
+
+/**
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a B skip MB
+ *
+ * @par Description:
+ *  The function computes the parameters for a B skip MB
+ *
+ * @param[in] ps_proc
+ *  Process context
+ *
+ * @param[in] u4_for_me
+ *  Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ *  Flag to indicate the current active refernce list
+ *
+ * @returns
+ *       1) Updates skip MV in proc
+ *       2) The type of SKIP [L0/L1/BI]
+ *
+ * @remarks
+ *******************************************************************************
+ */
+FT_FIND_SKIP_PARAMS isvce_find_bskip_params_me;
+
+/**
+ *******************************************************************************
+ *
+ * @brief motion vector predictor
+ *
+ * @par Description:
+ *  The routine calculates the motion vector predictor for a given block,
+ *  given the candidate MV predictors.
+ *
+ * @param[in] ps_left_mb_pu
+ *  pointer to left mb motion vector info
+ *
+ * @param[in] ps_top_row_pu
+ *  pointer to top & top right mb motion vector info
+ *
+ * @param[out] ps_pred_mv
+ *  pointer to candidate predictors for the current block
+ *
+ * @returns  The x & y components of the MV predictor.
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+ *   specification.
+ *   Assumptions : 1. Assumes Only partition of size 16x16
+ *
+ *******************************************************************************
+ */
+void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv,
+                            WORD32 pred_algo);
+
+/**
+ *******************************************************************************
+ *
+ * @brief This fucntion evalues ME for 2 reference lists
+ *
+ * @par Description:
+ *  It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
+ *
+ * @param[in] ps_proc
+ *  Process context corresponding to the job
+ *
+ * @returns  none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+FT_ME_ALGORITHM isvce_compute_me_multi_reflist;
+
+/**
+ *******************************************************************************
+ *
+ * @brief This fucntion evalues ME for single reflist [Pred L0]
+ *
+ * @par Description:
+ *  It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
+ *
+ * @param[in] ps_proc
+ *  Process context corresponding to the job
+ *
+ * @returns  none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+FT_ME_ALGORITHM isvce_compute_me_single_reflist;
+
+/**
+ *******************************************************************************
+ *
+ * @brief This function initializes me ctxt
+ *
+ * @par Description:
+ *  Before dispatching the current job to me thread, the me context associated
+ *  with the job is initialized.
+ *
+ * @param[in] ps_proc
+ *  Process context corresponding to the job
+ *
+ * @returns  none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void isvce_init_me(isvce_process_ctxt_t *ps_proc);
+
+/**
+ *******************************************************************************
+ *
+ * @brief This function performs motion estimation for the current NMB
+ *
+ * @par Description:
+ *  Intializes input and output pointers required by the function
+ *isvce_compute_me and calls the function isvce_compute_me in a loop to
+ *process NMBs.
+ *
+ * @param[in] ps_proc
+ *  Process context corresponding to the job
+ *
+ * @returns
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count);
+
+/**
+ *******************************************************************************
+ *
+ * @brief This function performs MV prediction
+ *
+ * @par Description:
+ *
+ * @param[in] ps_proc
+ *  Process context corresponding to the job
+ *
+ * @returns  none
+ *
+ * @remarks none
+ *  This function will update the MB availability since intra inter decision
+ *  should be done before the call
+ *
+ *******************************************************************************
+ */
+void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist);
+
+/**
+ *******************************************************************************
+ *
+ * @brief This function approximates Pred. MV
+ *
+ * @par Description:
+ *
+ * @param[in] ps_proc
+ *  Process context corresponding to the job
+ *
+ * @returns  none
+ *
+ * @remarks none
+ *  Motion estimation happens at nmb level. For cost calculations, mv is appro
+ *  ximated using this function
+ *
+ *******************************************************************************
+ */
+void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list);
+
+#endif
--- a/encoder/svc/isvce_mode_stat_visualiser.c
+++ b/encoder/svc/isvce_mode_stat_visualiser.c
@ -0,0 +1,191 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_mode_stat_visualiser.c
+*
+* @brief
+*  Contains functions used for synthesising analysis YUV
+*
+*******************************************************************************
+*/
+#include "isvce_defs.h"
+
+#if ENABLE_MODE_STAT_VISUALISER
+#include "ih264_typedefs.h"
+#include "isvc_macros.h"
+#include "ih264_debug.h"
+#include "isvc_defs.h"
+#include "isvc_structs.h"
+#include "isvce_structs.h"
+#include "isvce_structs.h"
+#include "ih264e_fmt_conv.h"
+#include "isvce_mode_stat_visualiser.h"
+
+#define MAX_NUM_MB_MODE_VISUALISATIONS 1
+
+static const UWORD8 gau1_output_file_path[] = "out.yuv";
+
+static const double gd_alpha = 0.5;
+
+static const UWORD8 gau1_colors[MAX_NUM_MB_MODE_VISUALISATIONS][NUM_COMPONENTS] = {
+    /* Red */
+    {81, 90, 240},
+};
+
+UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht)
+{
+    UWORD32 u4_size = sizeof(mode_stat_visualiser_t);
+    WORD32 i4_num_luma_samples = u4_wd * u4_ht;
+    WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4;
+
+    u4_size += (i4_num_luma_samples + i4_num_chroma_samples * 2) * sizeof(UWORD8);
+
+    return u4_size;
+}
+
+void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
+{
+    mode_stat_visualiser_t *ps_mode_stat_visualiser;
+    yuv_buf_props_t *ps_frame_buf;
+
+    WORD32 i;
+
+    UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
+    UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
+    WORD32 i4_num_luma_samples = u4_wd * u4_ht;
+    WORD32 i4_num_chroma_samples = i4_num_luma_samples / 4;
+    UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+    WORD64 i8_alloc_mem_size = isvce_get_msv_ctxt_size(u4_wd, u4_ht);
+
+    ps_mode_stat_visualiser = ps_codec->ps_mode_stat_visualiser =
+        (mode_stat_visualiser_t *) pu1_buf;
+    pu1_buf += sizeof(ps_mode_stat_visualiser[0]);
+    i8_alloc_mem_size -= sizeof(ps_mode_stat_visualiser[0]);
+
+    ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
+
+    ps_mode_stat_visualiser->ps_output_file = fopen((const char *) gau1_output_file_path, "w");
+
+    ps_frame_buf->e_color_format = IV_YUV_420P;
+    ps_frame_buf->u1_bit_depth = 8;
+    ps_frame_buf->u4_width = u4_wd;
+    ps_frame_buf->u4_height = u4_ht;
+
+    for(i = 0; i < NUM_COMPONENTS; i++)
+    {
+        UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
+        UWORD32 u4_buf_size = u1_is_chroma ? i4_num_chroma_samples : i4_num_luma_samples;
+        UWORD32 u4_stride = u4_wd >> u1_is_chroma;
+
+        ps_frame_buf->as_component_bufs[i].pv_data = pu1_buf;
+        ps_frame_buf->as_component_bufs[i].i4_data_stride = u4_stride;
+
+        pu1_buf += u4_buf_size;
+        i8_alloc_mem_size -= u4_buf_size;
+    }
+
+    ASSERT(i8_alloc_mem_size >= 0);
+}
+
+void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser)
+{
+    fclose(ps_mode_stat_visualiser->ps_output_file);
+}
+
+void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser,
+                               isvce_inp_buf_t *ps_inp_buf)
+{
+    svc_params_t *ps_svc_params = &ps_inp_buf->s_svc_params;
+    yuv_buf_props_t *ps_target_layer_yuv_buf =
+        &ps_inp_buf->as_layer_yuv_buf_props[ps_svc_params->u1_num_spatial_layers - 1];
+    yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
+
+    ASSERT(ps_target_layer_yuv_buf->u4_width == ps_frame_buf->u4_width);
+    ASSERT(ps_target_layer_yuv_buf->u4_height == ps_frame_buf->u4_height);
+    ASSERT(ps_target_layer_yuv_buf->u1_bit_depth == ps_frame_buf->u1_bit_depth);
+    ASSERT(ps_target_layer_yuv_buf->e_color_format == IV_YUV_420SP_UV);
+    ASSERT(ps_frame_buf->u1_bit_depth == IV_YUV_420P);
+    ASSERT(ps_target_layer_yuv_buf->as_component_bufs[U].i4_data_stride ==
+           ps_target_layer_yuv_buf->as_component_bufs[V].i4_data_stride);
+
+    isvce_fmt_conv_420sp_to_420p(
+        ps_target_layer_yuv_buf->as_component_bufs[Y].pv_data,
+        ps_target_layer_yuv_buf->as_component_bufs[UV].pv_data,
+        ps_frame_buf->as_component_bufs[Y].pv_data, ps_frame_buf->as_component_bufs[U].pv_data,
+        ps_frame_buf->as_component_bufs[V].pv_data, ps_frame_buf->u4_width, ps_frame_buf->u4_height,
+        ps_target_layer_yuv_buf->as_component_bufs[Y].i4_data_stride,
+        ps_target_layer_yuv_buf->as_component_bufs[UV].i4_data_stride,
+        ps_frame_buf->as_component_bufs[Y].i4_data_stride,
+        ps_frame_buf->as_component_bufs[U].i4_data_stride, 1, 0);
+}
+
+void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser,
+                        isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos)
+{
+    UWORD32 i, j, k;
+
+    for(i = 0; i < NUM_COMPONENTS; i++)
+    {
+        UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
+        UWORD32 u4_wd = MB_SIZE >> u1_is_chroma;
+        UWORD32 u4_ht = MB_SIZE >> u1_is_chroma;
+        UWORD8 *pu1_buf = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].pv_data;
+        WORD32 i4_stride = ps_mode_stat_visualiser->s_frame_buf.as_component_bufs[i].i4_data_stride;
+
+        pu1_buf += ps_mb_pos->i4_abscissa * u4_wd + ps_mb_pos->i4_ordinate * u4_ht * i4_stride;
+
+        for(j = 0; j < u4_ht; j++)
+        {
+            for(k = 0; k < u4_wd; k++)
+            {
+                if(ps_mb_info->u1_residual_prediction_flag)
+                {
+                    pu1_buf[k + j * i4_stride] =
+                        (UWORD8) (gd_alpha * gau1_colors[0][i] +
+                                  (1. - gd_alpha) * pu1_buf[k + j * i4_stride] + 0.5);
+                }
+            }
+        }
+    }
+}
+
+void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser)
+{
+    WORD32 i;
+
+    FILE *ps_output_file = ps_mode_stat_visualiser->ps_output_file;
+    yuv_buf_props_t *ps_frame_buf = &ps_mode_stat_visualiser->s_frame_buf;
+
+    for(i = 0; i < NUM_COMPONENTS; i++)
+    {
+        UWORD8 u1_is_chroma = (((COMPONENT_TYPE) i) != Y);
+        UWORD32 u4_wd = ps_frame_buf->u4_width >> u1_is_chroma;
+        UWORD32 u4_ht = ps_frame_buf->u4_height >> u1_is_chroma;
+        UWORD32 u4_size = u4_wd * u4_ht;
+
+        ASSERT(u4_wd == ps_frame_buf->as_component_bufs[i].i4_data_stride);
+
+        fwrite(ps_frame_buf->as_component_bufs[i].pv_data, sizeof(UWORD8), u4_size, ps_output_file);
+    }
+}
+#endif
--- a/encoder/svc/isvce_mode_stat_visualiser.h
+++ b/encoder/svc/isvce_mode_stat_visualiser.h
@ -0,0 +1,72 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_mode_stat_visualiser.h
+*
+* @brief
+*  Contains function declarations for function declared in
+*  isvce_mode_stat_visualiser.c
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_MODE_STAT_VISUALISER_H_
+#define _ISVCE_MODE_STAT_VISUALISER_H_
+#if ENABLE_MODE_STAT_VISUALISER
+
+#include <stdio.h>
+
+#include "ih264_typedefs.h"
+#include "isvc_structs.h"
+#include "isvce_structs.h"
+
+typedef struct mode_stat_visualiser_t
+{
+    FILE *ps_output_file;
+
+    yuv_buf_props_t s_frame_buf;
+
+} mode_stat_visualiser_t;
+
+extern UWORD32 isvce_get_msv_ctxt_size(UWORD32 u4_wd, UWORD32 u4_ht);
+
+extern void isvce_msv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec);
+
+extern void isvce_msv_ctxt_delete(mode_stat_visualiser_t *ps_mode_stat_visualiser);
+
+extern void isvce_msv_get_input_frame(mode_stat_visualiser_t *ps_mode_stat_visualiser,
+                                      isvce_inp_buf_t *ps_inp_buf);
+
+extern void isvce_msv_dump_visualisation(mode_stat_visualiser_t *ps_mode_stat_visualiser);
+
+extern void isvce_msv_set_mode(mode_stat_visualiser_t *ps_mode_stat_visualiser,
+                               isvce_mb_info_t *ps_mb_info, coordinates_t *ps_mb_pos);
+#endif
+
+#endif
--- a/encoder/svc/isvce_nalu_stat_aggregator.c
+++ b/encoder/svc/isvce_nalu_stat_aggregator.c
@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_nalu_stat_aggregator.c
+*
+* @brief
+*  Contains objects used for aggregating nalu statistics
+*
+*******************************************************************************
+*/
+#include <stdio.h>
+#include <string.h>
+
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "isvce_structs.h"
+#include "isvce_nalu_stat_aggregator.h"
+
+void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor, UWORD8 u1_num_spatial_layers)
+{
+    WORD32 i;
+
+    for(i = 0; i < u1_num_spatial_layers; i++)
+    {
+        ps_nalu_descriptor[i].u1_num_nalus = 0;
+    }
+}
+
+void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor,
+                                    isvce_nalu_info_buf_t *ps_csv_buf)
+{
+    char ac_csv_string[MAX_BYTES_PER_NALU_INFO];
+    WORD32 i;
+
+    WORD64 i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes;
+
+    for(i = 0; i < ps_nalu_descriptor->u1_num_nalus; i++)
+    {
+        if(ps_nalu_descriptor->as_nalu_info[i].b_is_vcl_nal)
+        {
+            snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n",
+                     ps_nalu_descriptor->as_nalu_info[i].e_nalu_type,
+                     (UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8),
+                     ps_nalu_descriptor->as_nalu_info[i].u1_spatial_layer_id,
+                     ps_nalu_descriptor->as_nalu_info[i].u1_temporal_layer_id,
+                     ps_nalu_descriptor->as_nalu_info[i].b_is_idr, 1, 1);
+        }
+        else
+        {
+            snprintf(ac_csv_string, MAX_BYTES_PER_NALU_INFO, "%d,%u,%d,%d,%d,%d,%d\n",
+                     ps_nalu_descriptor->as_nalu_info[i].e_nalu_type,
+                     (UWORD32) (ps_nalu_descriptor->as_nalu_info[i].i8_num_bits / 8), -1, -1, -1,
+                     -1, -1);
+        }
+
+        snprintf((char *) (ps_csv_buf->pu1_buf + ps_csv_buf->u4_num_bytes), i8_num_bytes_available,
+                 "%s", ac_csv_string);
+
+        ps_csv_buf->u4_num_bytes = (UWORD32) strlen((char *) ps_csv_buf->pu1_buf);
+        i8_num_bytes_available = ps_csv_buf->u4_buf_size - ps_csv_buf->u4_num_bytes;
+
+        ASSERT(i8_num_bytes_available >= 0);
+    }
+}
+
+nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor)
+{
+    return &ps_nalu_descriptor->as_nalu_info[ps_nalu_descriptor->u1_num_nalus];
+}
+
+void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bits,
+                              NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id,
+                              UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices, bool b_is_idr)
+{
+    ps_nalu_info->e_nalu_type = e_nalu_type;
+    ps_nalu_info->i8_num_bits = i8_init_bits;
+    ps_nalu_info->b_is_idr = b_is_idr;
+
+    switch(e_nalu_type)
+    {
+        case NAL_SLICE_NON_IDR:
+        case NAL_SLICE_IDR:
+        case NAL_CODED_SLICE_EXTENSION:
+        {
+            ps_nalu_info->b_is_vcl_nal = true;
+            ps_nalu_info->u1_spatial_layer_id = u1_spatial_layer_id;
+            ps_nalu_info->u1_temporal_layer_id = u1_temporal_layer_id;
+            ps_nalu_info->u1_num_slices = u1_num_slices;
+
+            break;
+        }
+        default:
+        {
+            ps_nalu_info->b_is_vcl_nal = false;
+
+            break;
+        }
+    }
+}
+
+void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor)
+{
+    ps_nalu_descriptor->u1_num_nalus++;
+}
--- a/encoder/svc/isvce_nalu_stat_aggregator.h
+++ b/encoder/svc/isvce_nalu_stat_aggregator.h
@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_nalu_stat_aggregator.h
+*
+* @brief
+*  Contains objects used for aggregating nalu statistics
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_NALU_STAT_AGGREGATOR_H_
+#define _ISVCE_NALU_STAT_AGGREGATOR_H_
+
+#include <stdbool.h>
+
+#include "ih264_typedefs.h"
+#include "isvce.h"
+#include "isvc_defs.h"
+#include "isvce_defs.h"
+
+/* Macros */
+/* +1 for '\0' */
+#define MAX_BYTES_PER_NALU_INFO (45 + 1)
+
+/* SPS + (MAX_NUM_SPATIAL_LAYERS - 1) * SUBSET_SPS +
+ * MAX_NUM_SPATIAL_LAYERS * PPS + */
+/* 1 PREFIX_NALU + 1 SLICE_[NON|]IDR + (MAX_NUM_SPATIAL_LAYERS - 1) *
+ * CODED_SLICE_EXTENSION */
+#define MAX_NALU_PER_LAYER 10
+
+/* Structs */
+typedef struct nalu_info_t
+{
+    NAL_UNIT_TYPE_T e_nalu_type;
+
+    WORD64 i8_num_bits;
+
+    bool b_is_vcl_nal;
+
+    bool b_is_idr;
+
+    UWORD8 u1_spatial_layer_id;
+
+    UWORD8 u1_temporal_layer_id;
+
+    UWORD8 u1_num_slices;
+} nalu_info_t;
+
+typedef struct nalu_descriptors_t
+{
+    nalu_info_t as_nalu_info[MAX_NALU_PER_LAYER];
+
+    UWORD8 u1_num_nalus;
+
+} nalu_descriptors_t;
+
+/* Function declarations */
+static FORCEINLINE UWORD32 isvce_get_nalu_info_buf_size(UWORD8 u1_num_spatial_layers)
+{
+    return MAX_NALU_PER_LAYER * u1_num_spatial_layers * MAX_BYTES_PER_NALU_INFO;
+}
+
+extern void isvce_nalu_info_au_init(nalu_descriptors_t *ps_nalu_descriptor,
+                                    UWORD8 u1_num_spatial_layers);
+
+extern void isvce_nalu_info_csv_translator(nalu_descriptors_t *ps_nalu_descriptor,
+                                           isvce_nalu_info_buf_t *ps_csv_buf);
+
+extern nalu_info_t *isvce_get_next_nalu_info_buf(nalu_descriptors_t *ps_nalu_descriptor);
+
+extern void isvce_nalu_info_buf_init(nalu_info_t *ps_nalu_info, WORD64 i8_init_bytes,
+                                     NAL_UNIT_TYPE_T e_nalu_type, UWORD8 u1_spatial_layer_id,
+                                     UWORD8 u1_temporal_layer_id, UWORD8 u1_num_slices,
+                                     bool b_is_idr);
+
+extern void isvce_update_nalu_count(nalu_descriptors_t *ps_nalu_descriptor);
+
+#endif
--- a/encoder/svc/isvce_pred_structs.h
+++ b/encoder/svc/isvce_pred_structs.h
@ -0,0 +1,156 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_pred_structs.h
+*
+* @brief
+*  Contains struct definition used for prediction
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_PRED_STRUCTS_H_
+#define _ISVCE_PRED_STRUCTS_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_defs.h"
+#include "isvc_structs.h"
+#include "isvce_defs.h"
+
+/**
+ * PU information
+ */
+typedef struct
+{
+    /**
+     *  Motion Vector
+     */
+    mv_t s_mv;
+
+    /**
+     *  Ref index
+     */
+    WORD8 i1_ref_idx;
+
+} isvce_enc_pu_mv_t;
+
+/*
+ * Total Pu info for an MB
+ */
+typedef struct isvce_enc_pu_t
+{
+    /* Array with ME info for all lists */
+    isvce_enc_pu_mv_t as_me_info[NUM_PRED_DIRS];
+
+    UWORD8 au1_mvp_idx[NUM_PRED_DIRS];
+
+    /**
+     *  PU X position in terms of min PU (4x4) units
+     */
+    UWORD8 u1_pos_x_in_4x4;
+
+    /**
+     *  PU Y position in terms of min PU (4x4) units
+     */
+    UWORD8 u1_pos_y_in_4x4;
+
+    /**
+     *  PU width in pixels = (u1_wd_in_4x4_m1 + 1) << 2
+     */
+    UWORD8 u1_wd_in_4x4_m1;
+
+    /**
+     *  PU height in pixels = (u1_ht_in_4x4_m1 + 1) << 2
+     */
+    UWORD8 u1_ht_in_4x4_m1;
+
+    /**
+     *  PRED_L0, PRED_L1, PRED_BI
+     */
+    UWORD8 u1_pred_mode;
+
+} isvce_enc_pu_t;
+
+typedef struct intra4x4_mode_data_t
+{
+    UWORD8 u1_predicted_mode;
+
+    UWORD8 u1_mode;
+
+} intra4x4_mode_data_t;
+
+typedef intra4x4_mode_data_t intra8x8_mode_data_t;
+
+typedef struct intra16x16_mode_data_t
+{
+    UWORD8 u1_mode;
+
+} intra16x16_mode_data_t;
+
+typedef struct enc_intra_pu_t
+{
+    intra4x4_mode_data_t as_i4x4_mode_data[MAX_TU_IN_MB];
+
+    intra8x8_mode_data_t as_i8x8_mode_data[MIN_TU_IN_MB];
+
+    intra16x16_mode_data_t s_i16x16_mode_data;
+
+    UWORD8 u1_chroma_intra_mode;
+
+} enc_intra_pu_t;
+
+typedef struct isvce_mb_info_t
+{
+    isvce_enc_pu_t as_pu[ENC_MAX_PU_IN_MB];
+
+    enc_intra_pu_t s_intra_pu;
+
+    UWORD32 u4_cbp;
+
+    UWORD32 u4_csbp;
+
+    UWORD32 u4_res_csbp;
+
+    UWORD16 u2_mb_type;
+
+    WORD32 i4_mb_distortion;
+
+    UWORD8 u1_base_mode_flag;
+
+    UWORD8 u1_residual_prediction_flag;
+
+    UWORD8 u1_tx_size;
+
+    UWORD8 u1_mb_qp;
+
+    UWORD8 u1_is_intra;
+
+} isvce_mb_info_t;
+
+#endif
--- a/encoder/svc/isvce_process.c
+++ b/encoder/svc/isvce_process.c
--- a/encoder/svc/isvce_process.h
+++ b/encoder/svc/isvce_process.h
@ -0,0 +1,285 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_process.h
+*
+* @brief
+*  Contains functions for codec thread
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_PROCESS_H_
+#define _ISVCE_PROCESS_H_
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+*  @brief This function generates sps, pps set on request
+*
+*  @par   Description
+*  When the encoder is set in header generation mode, the following function
+*  is called. This generates sps and pps headers and returns the control back
+*  to caller.
+*
+*  @param[in]    ps_codec
+*  pointer to codec context
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T isvce_generate_sps_pps(isvce_codec_t *ps_codec, isvce_inp_buf_t *ps_inp_buf);
+
+/**
+*******************************************************************************
+*
+* @brief   initialize entropy context.
+*
+* @par Description:
+*  Before invoking the call to perform to entropy coding the entropy context
+*  associated with the job needs to be initialized. This involves the start
+*  mb address, end mb address, slice index and the pointer to location at
+*  which the mb residue info and mb header info are packed.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_init_entropy_ctxt(isvce_process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief entry point for entropy coding
+*
+* @par Description
+*  This function calls lower level functions to perform entropy coding for a
+*  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
+*  back the control, updates the ctxt and calls lower level functions again.
+*  This process is repeated till all the rows or group of mb's (which ever is
+*  minimum) are coded
+*
+* @param[in] ps_proc
+*  process context
+*
+* @returns  error status
+*
+* @remarks
+* NOTE : It is assumed that this routine is invoked at the start of a slice,
+* so the slice header is generated by default.
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_entropy(isvce_process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Packs header information of a mb in to a buffer
+*
+* @par Description:
+*  After the deciding the mode info of a macroblock, the syntax elements
+*  associated with the mb are packed and stored. The entropy thread unpacks
+*  this buffer and generates the end bit stream.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_pack_header_data(isvce_process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief   update process context after encoding an mb. This involves preserving
+* the current mb information for later use, initialize the proc ctxt elements to
+* encode next mb.
+*
+* @par Description:
+*  This function performs house keeping tasks after encoding an mb.
+*  After encoding an mb, various elements of the process context needs to be
+*  updated to encode the next mb. For instance, the source, recon and reference
+*  pointers, mb indices have to be adjusted to the next mb. The slice index of
+*  the current mb needs to be updated. If mb qp modulation is enabled, then if
+*  the qp changes the quant param structure needs to be updated. Also to
+*encoding the next mb, the current mb info is used as part of mode prediction or
+*mv prediction. Hence the current mb info has to preserved at top/top left/left
+*  locations.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 isvce_update_proc_ctxt(isvce_process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief   initialize process context.
+*
+* @par Description:
+*  Before dispatching the current job to process thread, the process context
+*  associated with the job is initialized. Usually every job aims to encode one
+*  row of mb's. Basing on the row indices provided by the job, the process
+*  context's buffer ptrs, slice indices and other elements that are necessary
+*  during core-coding are initialized.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_init_proc_ctxt(isvce_process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma padding
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+*  Pointer to luma buffer
+*
+* @param[in] pu1_curr_pic_chroma
+*  Pointer to chroma buffer
+*
+* @param[in] i4_mb_x
+*  mb index x
+*
+* @param[in] i4_mb_y
+*  mb index y
+*
+* @param[in] i4_pad_ht
+*  number of rows to be padded
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_pad_recon_buffer(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma,
+                                      WORD32 i4_luma_stride, UWORD8 *pu1_curr_pic_chroma,
+                                      WORD32 i4_chroma_stride, WORD32 i4_mb_x, WORD32 i4_mb_y,
+                                      WORD32 i4_pad_ht);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma half pel planes generation
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T isvce_halfpel_generation(isvce_process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma,
+                                        WORD32 i4_mb_x, WORD32 i4_mb_y);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma core coding for a set of mb's.
+*
+* @par Description:
+*  The mb to be coded is taken and is evaluated over a predefined set of modes
+*  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least
+*cost is selected and using intra/inter prediction filters, prediction is
+*carried out. The deviation between src and pred signal constitutes error
+*signal. This error signal is transformed (hierarchical transform if necessary)
+*and quantized. The quantized residue is packed in to entropy buffer for entropy
+*coding. This is repeated for all the mb's enlisted under the job.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 isvce_process(isvce_process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  entry point of a spawned encoder thread
+*
+* @par Description:
+*  The encoder thread dequeues a proc/entropy job from the encoder queue and
+*  calls necessary routines.
+*
+* @param[in] pv_proc
+*  Process context corresponding to the thread
+*
+* @returns  error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 isvce_process_thread(void *pv_proc);
+
+#endif
--- a/encoder/svc/isvce_rate_control.c
+++ b/encoder/svc/isvce_rate_control.c
@ -0,0 +1,716 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_rate_control.c
+*
+* @brief
+*  Contains api function definitions for h264 rate control
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - isvce_rc_init()
+*  - isvce_rc_get_picture_details()
+*  - isvce_rc_pre_enc()
+*  - isvce_update_rc_mb_info()
+*  - isvce_rc_get_buffer_status()
+*  - isvce_rc_post_enc()
+*  - isvce_update_rc_bits_info()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+#include "ih264_typedefs.h"
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "isvce.h"
+#include "isvc_defs.h"
+#include "isvc_macros.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "isvc_inter_pred_filters.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_common_tables.h"
+#include "isvc_cabac_tables.h"
+#include "isvce_defs.h"
+#include "isvce_globals.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+#include "isvce_rate_control.h"
+#include "ih264e_error.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "ih264e_utils.h"
+#include "irc_trace_support.h"
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function initializes rate control context and variables
+*
+* @par Description
+*  This function initializes rate control type, source and target frame rate,
+*  average and peak bitrate, intra-inter frame interval and initial
+*  quantization parameter
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api
+*
+* @param[in] pv_frame_time
+*  Handle to frame time context
+*
+* @param[in] pv_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+*  Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+*  Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+*  Target frame rate
+*
+* @param[in] e_rate_control_type
+*  Rate control type
+*
+* @param[in] u4_avg_bit_rate
+*  Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+*  Peak bit rate
+*
+* @param[in] u4_max_delay
+*  Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+*  Intra frame interval
+*
+* @param[in] pu1_init_qp
+*  Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+*  Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+*  Array of min/max qp
+*
+* @param[in] u1_profile_level
+*  Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate,
+                   UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate,
+                   rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate,
+                   UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int,
+                   UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp,
+                   UWORD8 u1_profile_level)
+{
+    //    UWORD8  u1_is_mb_level_rc_on = 0;
+    UWORD32 au4_peak_bit_rate[2] = {0, 0};
+    UWORD32 u4_min_bit_rate = 0;
+    WORD32 i4_is_gop_closed = 1;
+    //    WORD32  i4_use_est_intra_sad = 1;
+    UWORD32 u4_src_ticks = 0;
+    UWORD32 u4_tgt_ticks = 0;
+    UWORD8 u1_level_idx = ih264e_get_lvl_idx(u1_profile_level);
+    UWORD32 u4_max_cpb_size = 1200 * gas_isvc_lvl_tbl[u1_level_idx].u4_max_cpb_size;
+
+    /* Fill the params needed for the RC init */
+    if(e_rate_control_type == CBR_NLDRC)
+    {
+        au4_peak_bit_rate[0] = u4_avg_bit_rate;
+        au4_peak_bit_rate[1] = u4_avg_bit_rate;
+    }
+    else
+    {
+        au4_peak_bit_rate[0] = u4_peak_bit_rate;
+        au4_peak_bit_rate[1] = u4_peak_bit_rate;
+    }
+
+    /* Initialize frame time computation module*/
+    ih264e_init_frame_time(pv_frame_time, u4_src_frm_rate, /* u4_src_frm_rate */
+                           u4_tgt_frm_rate);               /* u4_tgt_frm_rate */
+
+    /* Initialize the pull_down frame rate */
+    ih264e_init_pd_frm_rate(pv_pd_frm_rate, u4_src_frm_rate); /* u4_input_frm_rate */
+
+    /* Initialize time stamp structure */
+    ih264e_init_time_stamp(pv_time_stamp, u4_max_frm_rate, /* u4_max_frm_rate */
+                           u4_src_frm_rate);               /* u4_src_frm_rate */
+
+    u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time);
+    u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time);
+
+    /* Init max_inter_frame int */
+    i4_max_inter_frm_int = (i4_inter_frm_int == 1) ? 2 : (i4_inter_frm_int + 2);
+
+    /* Initialize the rate control */
+    irc_initialise_rate_control(
+        pv_rc_api,               /* RC handle */
+        e_rate_control_type,     /* RC algo type */
+        0,                       /* MB activity on/off */
+        u4_avg_bit_rate,         /* Avg Bitrate */
+        au4_peak_bit_rate,       /* Peak bitrate array[2]:[I][P] */
+        u4_min_bit_rate,         /* Min Bitrate */
+        u4_src_frm_rate,         /* Src frame_rate */
+        u4_max_delay,            /* Max buffer delay */
+        u4_intra_frame_interval, /* Intra frm_interval */
+        i4_inter_frm_int,        /* Inter frame interval */
+        pu1_init_qp,             /* Init QP array[3]:[I][P][B] */
+        u4_max_cpb_size,         /* Max VBV/CPB Buffer Size */
+        i4_max_inter_frm_int,    /* Max inter frm_interval */
+        i4_is_gop_closed,        /* Open/Closed GOP */
+        pu1_min_max_qp,          /* Min-max QP
+                                    array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */
+        0,                       /* How to calc the I-frame estimated_sad */
+        u4_src_ticks,            /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate
+                                  */
+        u4_tgt_ticks);           /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate
+                                  */
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par   Description
+*  This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+*  Handle to Rate control api
+*
+* @returns
+*  Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id,
+                                            WORD32 *pi4_pic_disp_order_no)
+{
+    picture_type_e e_rc_pic_type = P_PIC;
+
+    irc_get_picture_details(pv_rc_api, pi4_pic_id, pi4_pic_disp_order_no, &e_rc_pic_type);
+
+    return (e_rc_pic_type);
+}
+
+/**
+*******************************************************************************
+*
+* @brief  Function to get rate control output before encoding
+*
+* @par Description
+*  This function is called before queing the current frame. It decides if we
+*should skip the current iput buffer due to frame rate mismatch. It also updates
+*RC about the acehivble frame rate
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] i4_delta_time_stamp
+*  Time stamp difference between frames
+*
+* @param[in] i4_total_mb_in_frame
+*  Total Macro Blocks in frame
+*
+* @param[in/out] pe_vop_coding_type
+*  Picture coding type(I/P/B)
+*
+* @param[in/out] pu1_frame_qp
+*  QP for current frame
+*
+* @returns
+*  Skip or queue the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate,
+                                  void *ps_time_stamp, void *ps_frame_time)
+{
+    WORD8 i4_skip_src = 0;
+    UWORD32 u4_src_not_skipped_for_dts = 0;
+
+    /* Update the time stamp for the current frame */
+    ih264e_update_time_stamp(ps_time_stamp);
+
+    /* Check if a src not needs to be skipped */
+    i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time, 1, &u4_src_not_skipped_for_dts);
+
+    if(i4_skip_src)
+    {
+        /***********************************************************************
+         *Based on difference in source and target frame rate frames are skipped
+         ***********************************************************************/
+        /*update the missing frames frm_rate with 0 */
+        ih264e_update_pd_frm_rate(ps_pd_frm_rate, 0);
+    }
+    else
+    {
+        WORD32 i4_avg_frm_rate, i4_source_frame_rate;
+
+        i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time);
+
+        /* Update the frame rate of the frame present with the tgt_frm_rate */
+        /* If the frm was not skipped due to delta_time_stamp, update the
+         frame_rate with double the tgt_frame_rate value, so that it makes
+         up for one of the frames skipped by the application */
+        ih264e_update_pd_frm_rate(ps_pd_frm_rate, i4_source_frame_rate);
+
+        /* Based on the update get the average frame rate */
+        i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate);
+
+        /* Call the RC library function to change the frame_rate to the
+         actually achieved frm_rate */
+        irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate);
+    }
+
+    return (i4_skip_src);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par   Description
+*  After encoding a mb, information such as mb type, qp used, mb distortion
+*  resulted in encoding the block and so on needs to be preserved for modeling
+*  RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+*  Handle Frame info context
+*
+* @param[in] ps_proc
+*  Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc)
+{
+    /* proc ctxt */
+    isvce_process_ctxt_t *ps_proc = pv_proc;
+
+    /* is intra or inter */
+    WORD32 mb_type = !ps_proc->ps_mb_info->u1_is_intra;
+
+    /* distortion */
+    ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion;
+
+    /* qp */
+    ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u1_mb_qp];
+
+    /* mb cnt */
+    ps_frame_info->num_mbs[mb_type]++;
+
+    /* cost */
+    if(ps_proc->ps_mb_info->u1_is_intra)
+    {
+        ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+*  This function is used to get buffer status(underflow/overflow) by rate
+*  control module
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+*  Total frame bits
+*
+* @param[in] u1_pic_type
+*  Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+*  Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+*  Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+*  Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits,
+                                picture_type_e e_pic_type,
+                                WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+                                UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow)
+{
+    vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL;
+
+    e_vbv_buf_status = irc_get_buffer_status(pv_rc_api, i4_total_frame_bits, e_pic_type,
+                                             pi4_num_bits_to_prevent_vbv_underflow);
+
+    if(e_vbv_buf_status == VBV_OVERFLOW)
+    {
+        *pu1_is_enc_buf_underflow = 1;
+        *pu1_is_enc_buf_overflow = 0;
+    }
+    else if(e_vbv_buf_status == VBV_UNDERFLOW)
+    {
+        *pu1_is_enc_buf_underflow = 0;
+        *pu1_is_enc_buf_overflow = 1;
+    }
+    else
+    {
+        *pu1_is_enc_buf_underflow = 0;
+        *pu1_is_enc_buf_overflow = 0;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+*  This function is used to update the rate control module after the current
+*  frame encoding is done with details such as bits consumed, SAD for I/P/B,
+*  intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api context
+*
+* @param[in] ps_frame_info
+*  Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+*  Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+*  Picture coding type
+*
+* @param[in] i4_is_first_frame
+*  Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+*  Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+*  Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+*  Numberf of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+*  Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
+                         void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
+                         WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
+                         WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
+                         UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
+                         WORD32 *pi4_avg_activity
+#if ENABLE_RE_ENC_AS_SKIP
+                         ,
+                         UWORD8 *u1_is_post_enc_skip
+#endif
+)
+{
+    /* Variables for the update_frm_level_info */
+    WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
+    WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
+    WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
+    WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
+    WORD32 i4_total_frame_bits = 0;
+    WORD32 i4_total_hdr_bits = 0;
+    WORD32 i4_total_texturebits;
+    WORD32 i4_avg_mb_activity = 0;
+    WORD32 i4_intra_frm_cost = 0;
+    UWORD8 u1_is_scd = 0;
+    WORD32 i4_cbr_bits_to_stuff = 0;
+    UWORD32 u4_num_intra_in_prev_frame = *pi4_num_intra_in_prev_frame;
+
+    UNUSED(ps_pd_frm_rate);
+    UNUSED(ps_time_stamp);
+    UNUSED(ps_frame_time);
+    UNUSED(u1_frame_qp);
+    UNUSED(i4_is_first_frame);
+    /* Accumulate RC stats */
+    ai4_tot_mb_in_type[MB_TYPE_INTRA] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTRA);
+    ai4_tot_mb_in_type[MB_TYPE_INTER] = irc_fi_get_total_mb(ps_frame_info, MB_TYPE_INTER);
+    ai4_tot_mb_type_qp[MB_TYPE_INTRA] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTRA);
+    ai4_tot_mb_type_qp[MB_TYPE_INTER] = irc_fi_get_total_mb_qp(ps_frame_info, MB_TYPE_INTER);
+    ai4_mb_type_sad[MB_TYPE_INTRA] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTRA);
+    ai4_mb_type_sad[MB_TYPE_INTER] = irc_fi_get_total_mb_sad(ps_frame_info, MB_TYPE_INTER);
+    i4_intra_frm_cost = irc_fi_get_total_intra_mb_cost(ps_frame_info);
+    i4_avg_mb_activity = irc_fi_get_avg_activity(ps_frame_info);
+    i4_total_hdr_bits = irc_fi_get_total_header_bits(ps_frame_info);
+    i4_total_texturebits = irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTRA);
+    i4_total_texturebits += irc_fi_get_total_mb_texture_bits(ps_frame_info, MB_TYPE_INTER);
+    i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits;
+
+    *pi4_avg_activity = i4_avg_mb_activity;
+
+    /* Texture bits are not accumulated. Hence subtracting hdr bits from total
+     * bits */
+    ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
+    ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
+
+    /* Set post encode skip to zero */
+    pi4_is_post_encode_skip[0] = 0;
+
+    /* For NLDRC, get the buffer status for stuffing or skipping */
+    if(irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC)
+    {
+        WORD32 i4_get_num_bit_to_prevent_vbv_overflow;
+        UWORD8 u1_enc_buf_overflow, u1_enc_buf_underflow;
+
+        /* Getting the buffer status */
+        isvce_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0],
+                                   &i4_get_num_bit_to_prevent_vbv_overflow, &u1_enc_buf_overflow,
+                                   &u1_enc_buf_underflow);
+
+        /* We skip the frame if decoder buffer is underflowing. But we never skip
+         * first I frame */
+#if !DISABLE_POST_ENC_SKIP
+        if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
+        // if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
+        {
+            irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e) pe_vop_coding_type[0]);
+            // i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc);
+            i4_total_frame_bits = 0;
+
+            *pi4_is_post_encode_skip = 1;
+
+            /* Adjust the GOP if in case we skipped an I-frame */
+            if(*pe_vop_coding_type == I_PIC) irc_force_I_frame(ps_rate_control_api);
+
+            /* Since this frame is skipped by writing 7 bytes header, we say this is a
+             * P frame */
+            // *pe_vop_coding_type = P;
+
+            /* Getting the buffer status again,to check if it underflows  */
+            irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+                                  (picture_type_e) pe_vop_coding_type[0],
+                                  &i4_get_num_bit_to_prevent_vbv_overflow);
+        }
+#endif
+
+#if ENABLE_RE_ENC_AS_SKIP
+        /* Check for VBV constraints - post encode skip */
+        if(u1_enc_buf_overflow == 1 && (pe_vop_coding_type[0] != I_PIC))
+        {
+            *u1_is_post_enc_skip = 1;
+
+            ai4_tot_mb_in_type[MB_TYPE_INTER] += ai4_tot_mb_in_type[MB_TYPE_INTRA];
+            ai4_tot_mb_in_type[MB_TYPE_INTRA] = 0;
+            ai4_tot_mb_type_qp[MB_TYPE_INTER] += ai4_tot_mb_type_qp[MB_TYPE_INTRA];
+            ai4_tot_mb_type_qp[MB_TYPE_INTRA] = 0;
+
+            ai4_mb_type_sad[MB_TYPE_INTER] += ai4_mb_type_sad[MB_TYPE_INTRA];
+            ai4_mb_type_sad[MB_TYPE_INTRA] = 0;
+
+            i4_intra_frm_cost = 0;
+
+            i4_total_hdr_bits = 0;
+            i4_total_texturebits = 0;
+            i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits;
+
+            ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
+            ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
+
+            /* Getting the buffer status again,to check if it underflows  */
+            irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+                                  (picture_type_e) pe_vop_coding_type[0],
+                                  &i4_get_num_bit_to_prevent_vbv_overflow);
+        }
+#endif
+
+        /* In this case we stuff bytes as buffer is overflowing */
+        if(u1_enc_buf_underflow == 1)
+        {
+            /* The stuffing function is directly pulled out from split controller
+               workspace. encode_vop_data() function makes sure alignment data is
+               dumped at the end of a frame. Split controller was identifying this
+               alignment byte, overwriting it with the stuff data and then finally
+               aligning the buffer. Here every thing is inside the DSP. So, ideally
+               encode_vop_data needn't align, and we can start stuffing directly. But
+               in that case, it'll break the logic for a normal frame. Hence for
+               simplicity, not changing this part since it is ok to align and then
+               overwrite since stuffing is not done for every frame */
+            i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits,
+                                                         pe_vop_coding_type[0]);
+
+            /* Just add extra 32 bits to make sure we don't stuff lesser */
+            i4_cbr_bits_to_stuff += 32;
+
+            /* We can not stuff more than the outbuf size. So have a check here */
+            /* Add stuffed bits to total bits */
+            i4_total_frame_bits += i4_cbr_bits_to_stuff;
+        }
+    }
+
+    /* If number of intra MBs are more than 2/3rd of total MBs, assume it as a
+     * scene change */
+    if((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) &&
+       (*pe_vop_coding_type == P_PIC) &&
+       (ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32) u4_num_intra_in_prev_frame) / 10)))
+    {
+        u1_is_scd = 1;
+    }
+
+    /* Update num intra mbs of this frame */
+    if(pi4_is_post_encode_skip[0] == 0)
+    {
+        *pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA];
+    }
+
+    /* Reset intra count to zero, if u encounter an I frame */
+    if(*pe_vop_coding_type == I_PIC)
+    {
+        *pi4_num_intra_in_prev_frame = 0;
+    }
+
+    /* Do an update of rate control after post encode */
+    irc_update_frame_level_info(ps_rate_control_api,        /* RC state */
+                                pe_vop_coding_type[0],      /* PIC type */
+                                ai4_mb_type_sad,            /* SAD for [Intra/Inter] */
+                                i4_total_frame_bits,        /* Total frame bits */
+                                i4_total_hdr_bits,          /* header bits for */
+                                ai4_mb_type_tex_bits,       /* for MB[Intra/Inter] */
+                                ai4_tot_mb_type_qp,         /* for MB[Intra/Inter] */
+                                ai4_tot_mb_in_type,         /* for MB[Intra/Inter] */
+                                i4_avg_mb_activity,         /* Average mb activity in frame */
+                                u1_is_scd,                  /* Is a scene change detected */
+                                0,                          /* Pre encode skip  */
+                                (WORD32) i4_intra_frm_cost, /* Intra cost for frame */
+                                0);                         /* Not done outside */
+
+    return (i4_cbr_bits_to_stuff >> 3);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+*  Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+*  Frame info context
+*
+* @param[in] ps_entropy
+*  Entropy context
+*
+* @returns
+*  total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy)
+{
+    isvce_entropy_ctxt_t *ps_entropy = pv_entropy;
+
+    ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA];
+
+    ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA];
+
+    ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER];
+
+    ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER];
+
+    return;
+}
--- a/encoder/svc/isvce_rate_control.h
+++ b/encoder/svc/isvce_rate_control.h
@ -0,0 +1,330 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_rate_control.h
+*
+* @brief
+*  This file contains function declarations of api functions for h264 rate
+*  control
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_RATE_CONTROL_H_
+#define _ISVCE_RATE_CONTROL_H_
+
+#if ENABLE_RE_ENC_AS_SKIP
+#include "isvce_structs.h"
+#endif
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function initializes rate control context and variables
+*
+* @par Description
+*  This function initializes rate control type, source and target frame rate,
+*  average and peak bitrate, intra-inter frame interval and initial
+*  quantization parameter
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api
+*
+* @param[in] pv_frame_time
+*  Handle to frame time context
+*
+* @param[in] pv_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+*  Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+*  Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+*  Target frame rate
+*
+* @param[in] e_rate_control_type
+*  Rate control type
+*
+* @param[in] u4_avg_bit_rate
+*  Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+*  Peak bit rate
+*
+* @param[in] u4_max_delay
+*  Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+*  Intra frame interval
+*
+* @param[in] i4_inter_frm_int
+*  Inter frame interval
+*
+* @param[in] pu1_init_qp
+*  Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+*  Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+*  Array of min/max qp
+*
+* @param[in] u1_profile_level
+*  Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_rc_init(void *pv_rc_api, void *pv_frame_time, void *pv_time_stamp, void *pv_pd_frm_rate,
+                   UWORD32 u4_max_frm_rate, UWORD32 u4_src_frm_rate, UWORD32 u4_tgt_frm_rate,
+                   rc_type_e e_rate_control_type, UWORD32 u4_avg_bit_rate, UWORD32 u4_peak_bit_rate,
+                   UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, WORD32 i4_inter_frm_int,
+                   UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp,
+                   UWORD8 u1_profile_level);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par   Description
+*  This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+*  Handle to Rate control api
+*
+* @returns
+*  Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e isvce_rc_get_picture_details(void *pv_rc_api, WORD32 *pi4_pic_id,
+                                            WORD32 *pi4_pic_disp_order_no);
+
+/**
+*******************************************************************************
+*
+* @brief  Function to set frame rate inside RC.
+*
+* @par Description
+*  This function is called before encoding the current frame and gets the qp
+*  for the current frame from rate control module
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @returns
+*  Skip or encode the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 isvce_update_rc_framerates(void *ps_rate_control_api, void *ps_pd_frm_rate,
+                                  void *ps_time_stamp, void *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par   Description
+*  After encoding a mb, information such as mb type, qp used, mb distortion
+*  resulted in encoding the block and so on needs to be preserved for modelling
+*  RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+*  Handle Frame info context
+*
+* @param[in] ps_proc
+*  Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+*  This function is used to get buffer status(underflow/overflow) by rate
+*  control module
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+*  Total frame bits
+*
+* @param[in] u1_pic_type
+*  Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+*  Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+*  Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+*  Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_rc_get_buffer_status(void *pv_rc_api, WORD32 i4_total_frame_bits,
+                                picture_type_e e_pic_type,
+                                WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+                                UWORD8 *pu1_is_enc_buf_overflow, UWORD8 *pu1_is_enc_buf_underflow);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+*  This function is used to update the rate control module after the current
+*  frame encoding is done with details such as bits consumed, SAD for I/P/B,
+*  intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api context
+*
+* @param[in] ps_frame_info
+*  Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+*  Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+*  Picture coding type
+*
+* @param[in] i4_is_first_frame
+*  Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+*  Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+*  Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+*  Number of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+*  Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+#if ENABLE_RE_ENC_AS_SKIP
+WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
+                         void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
+                         WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
+                         WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
+                         UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
+                         WORD32 *pi4_avg_activity, UWORD8 *u1_is_post_enc_skip);
+#else
+WORD32 isvce_rc_post_enc(void *ps_rate_control_api, frame_info_t *ps_frame_info,
+                         void *ps_pd_frm_rate, void *ps_time_stamp, void *ps_frame_time,
+                         WORD32 i4_total_mb_in_frame, picture_type_e *pe_vop_coding_type,
+                         WORD32 i4_is_first_frame, WORD32 *pi4_is_post_encode_skip,
+                         UWORD8 u1_frame_qp, WORD32 *pi4_num_intra_in_prev_frame,
+                         WORD32 *pi4_avg_activity);
+
+#endif
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+*  Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+*  Frame info context
+*
+* @param[in] ps_entropy
+*  Entropy context
+*
+* @returns
+*  total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void isvce_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy);
+
+#endif
--- a/encoder/svc/isvce_rc_mem_interface.c
+++ b/encoder/svc/isvce_rc_mem_interface.c
@ -0,0 +1,325 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file
+*  isvce_rc_mem_interface.c
+*
+* @brief
+*  This file contains api function definitions for rate control memtabs
+*
+* @author
+*  ittiam
+*
+* List of Functions
+*  - fill_memtab()
+*  - use_or_fill_base()
+*  - isvce_map_rc_mem_recs_to_itt_api()
+*  - isvce_map_itt_mem_rec_to_rc_mem_rec()
+*  - isvce_get_rate_control_mem_tab()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User Include Files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_size_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "isvce.h"
+#include "ithread.h"
+#include "isvc_defs.h"
+#include "ih264_debug.h"
+#include "isvc_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "isvc_structs.h"
+#include "isvc_trans_quant_itrans_iquant.h"
+#include "isvc_inter_pred_filters.h"
+#include "isvc_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "isvc_common_tables.h"
+#include "ih264_list.h"
+#include "isvc_cabac_tables.h"
+#include "ih264e_error.h"
+#include "isvce_defs.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "isvce_rate_control.h"
+#include "isvce_cabac_structs.h"
+#include "isvce_structs.h"
+#include "ih264e_master.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "isvce_utils.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264e_fmt_conv.h"
+#include "isvce_cavlc.h"
+#include "ih264e_rc_mem_interface.h"
+#include "isvce_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "irc_common.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "ih264e_modify_frm_rate.h"
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function maps rc mem records structure to encoder lib mem records
+*  structure
+*
+* @par   Description
+*  This function maps rc mem records structure to encoder lib mem records
+*  structure
+*
+* @param[in]   ps_mem
+*  pointer to encoder lib mem records
+*
+* @param[in]   rc_memtab
+*  pointer to rc mem records
+*
+* @param[in]   num_mem_recs
+*  number of memory records
+*
+* @return      void
+*
+******************************************************************************
+*/
+static void isvce_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab,
+                                             UWORD32 num_mem_recs)
+{
+    UWORD32 j;
+    UWORD32 Size, align;
+
+    for(j = 0; j < num_mem_recs; j++)
+    {
+        Size = rc_memtab->u4_size;
+        align = rc_memtab->i4_alignment;
+
+        /* we always ask for external persistent cacheable memory */
+        FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM);
+
+        rc_memtab++;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function maps encoder lib mem records structure to RC memory
+* records structure
+*
+* @par   Description
+*  This function maps encoder lib mem records structure to RC memory
+*  records structure
+*
+* @param[in] ps_mem
+*  pointer to encoder lib mem records
+*
+* @param[in] rc_memtab
+*  pointer to rc mem records
+*
+* @param[in] num_mem_recs
+*  Number of memory records
+
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+static void isvce_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem, itt_memtab_t *rc_memtab,
+                                                UWORD32 num_mem_recs)
+{
+    UWORD32 i;
+
+    for(i = 0; i < num_mem_recs; i++)
+    {
+        rc_memtab->i4_alignment = ps_mem->u4_mem_alignment;
+        rc_memtab->u4_size = ps_mem->u4_mem_size;
+        rc_memtab->pv_base = ps_mem->pv_base;
+
+        /* only DDR memory is available */
+        rc_memtab->e_mem_region = DDR;
+        rc_memtab->e_usage = PERSISTENT;
+
+        rc_memtab++;
+        ps_mem++;
+    }
+}
+
+/**
+******************************************************************************
+*
+* @brief Get memtabs for rate control
+*
+* @par   Description
+*  This routine is used to Get/init memtabs for rate control
+*
+* @param[in] pv_rate_control
+*  pointer to rate control context (handle)
+*
+* @param[in] ps_mem
+*  pointer to encoder lib mem records
+*
+* @param[in] e_func_type
+*  enum that dictates fill memory records or Init memory records
+*
+* @return total number of mem records
+*
+******************************************************************************
+*/
+WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem,
+                                      ITT_FUNC_TYPE_E e_func_type)
+{
+    itt_memtab_t as_itt_memtab[NUM_SVCE_RC_MEMTABS];
+    WORD32 i4_num_memtab = 0, j = 0;
+    void *refptr2[RC_MEM_CNT];
+    void **refptr1[RC_MEM_CNT];
+    isvce_rate_control_ctxt_t *ps_rate_control = pv_rate_control;
+
+    for(j = 0; j < RC_MEM_CNT; j++) refptr1[j] = &(refptr2[j]);
+
+    j = 0;
+
+    if(e_func_type == USE_BASE || e_func_type == FILL_BASE)
+    {
+        refptr1[RC_MEM_FRAME_TIME] = &ps_rate_control->pps_frame_time;
+        refptr1[RC_MEM_TIME_STAMP] = &ps_rate_control->pps_time_stamp;
+        refptr1[RC_MEM_FRAME_RATE] = &ps_rate_control->pps_pd_frm_rate;
+        refptr1[RC_MEM_API_L0] = &ps_rate_control->apps_rate_control_api[0];
+        refptr1[RC_MEM_API_L1] = &ps_rate_control->apps_rate_control_api[1];
+        refptr1[RC_MEM_API_L2] = &ps_rate_control->apps_rate_control_api[2];
+    }
+
+    /* Get the total number of memtabs used by Frame time Module */
+    i4_num_memtab = ih264e_frame_time_get_init_free_memtab(
+        (frame_time_t **) refptr1[RC_MEM_FRAME_TIME], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    /* Fill the memtabs used by Frame time Module */
+    i4_num_memtab = ih264e_frame_time_get_init_free_memtab(
+        (frame_time_t **) refptr1[RC_MEM_FRAME_TIME], as_itt_memtab + j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Time stamp Module */
+    i4_num_memtab = ih264e_time_stamp_get_init_free_memtab(
+        (time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    /* Fill the memtabs used by Time Stamp Module */
+    i4_num_memtab = ih264e_time_stamp_get_init_free_memtab(
+        (time_stamp_t **) refptr1[RC_MEM_TIME_STAMP], as_itt_memtab + j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Frame rate Module */
+    i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab(
+        (pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    /* Fill the memtabs used by Frame Rate Module */
+    i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab(
+        (pd_frm_rate_t **) refptr1[RC_MEM_FRAME_RATE], as_itt_memtab + j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
+        (rate_control_api_t **) refptr1[RC_MEM_API_L0], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    /* Fill the memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
+        (rate_control_api_t **) refptr1[RC_MEM_API_L0], as_itt_memtab + j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
+        (rate_control_api_t **) refptr1[RC_MEM_API_L1], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    /* Fill the memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
+        (rate_control_api_t **) refptr1[RC_MEM_API_L1], as_itt_memtab + j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
+        (rate_control_api_t **) refptr1[RC_MEM_API_L2], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    isvce_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    /* Fill the memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab(
+        (rate_control_api_t **) refptr1[RC_MEM_API_L2], as_itt_memtab + j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    isvce_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab + j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    return j; /* Total MemTabs Needed by Rate Control Module */
+}
--- a/encoder/svc/isvce_rc_mem_interface.h
+++ b/encoder/svc/isvce_rc_mem_interface.h
@ -0,0 +1,77 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+******************************************************************************
+* @file
+*  isvce_rc_mem_interface.h
+*
+* @brief
+*  This file contains function declaration and structures for rate control
+*  memtabs
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  The rate control library is a global library across various codecs. It
+*  anticipates certain structures definitions. Those definitions are to be
+*  imported from global workspace. Instead of that, the structures needed for
+*  rc library are copied in to this file and exported to rc library. If the
+*  structures / enums / ... in the global workspace change, this file also needs
+*  to be modified accordingly.
+*
+******************************************************************************
+*/
+#ifndef _ISVCE_RC_MEM_INTERFACE_H_
+#define _ISVCE_RC_MEM_INTERFACE_H_
+
+#include "ih264e_rc_mem_interface.h"
+
+/**
+ ***************************************************************************
+ * Enum to hold mem records in RC
+ ****************************************************************************
+ */
+typedef enum RC_MEM_TYPES_T
+{
+    RC_MEM_FRAME_TIME,
+
+    RC_MEM_TIME_STAMP,
+
+    RC_MEM_FRAME_RATE,
+
+    RC_MEM_API_L0,
+
+    RC_MEM_API_L1,
+
+    RC_MEM_API_L2,
+
+    RC_MEM_CNT
+
+    /*
+     * Do not add anything below
+     */
+} RC_MEM_TYPES_T;
+
+extern WORD32 isvce_get_rate_control_mem_tab(void *pv_rate_control, iv_mem_rec_t *ps_mem,
+                                             ITT_FUNC_TYPE_E e_func_type);
+
+#endif
--- a/encoder/svc/isvce_rc_utils.c
+++ b/encoder/svc/isvce_rc_utils.c
@ -0,0 +1,286 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_rc_utils.c
+*
+* @brief
+*  Contains get gpp function required by the SVC encoder
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - isvce_get_gpp()
+*  - isvce_rc_utils_init()
+*  - isvce_get_rc_utils_data_size()
+*  - isvce_compute_gpp()
+*  - isvce_get_gpp_function_selector()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "isvc_structs.h"
+#include "isvce_rc_utils.h"
+#include "isvce_rc_utils_private_defs.h"
+
+/**
+*******************************************************************************
+*
+* @brief
+*   get gpp function
+*
+* @par Description:
+*   computes gradient per pixel value for a given frame
+*
+* @param[in] ps_input_buf
+*  pointer to yuv buffer properties
+*
+* @returns
+*  calculated gpp value
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+static DOUBLE isvce_get_gpp(yuv_buf_props_t *ps_input_buf)
+{
+    UWORD32 i, j;
+
+    DOUBLE d_gpp_y = 0;
+    DOUBLE d_gpp_u = 0;
+    DOUBLE d_gpp_v = 0;
+
+    DOUBLE d_gpp = 0;
+
+    UWORD32 u4_width = ps_input_buf->u4_width;
+    UWORD32 u4_height = ps_input_buf->u4_height;
+
+    UWORD8 *pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[0].pv_data;
+    WORD32 i4_input_stride = ps_input_buf->as_component_bufs[0].i4_data_stride;
+
+    for(i = 0; i < u4_height - 1; i++)
+    {
+        for(j = 0; j < u4_width - 1; j++)
+        {
+            UWORD8 u1_cur_pix = pu1_input_buf[j];
+            UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j];
+            UWORD8 u1_right_pix = pu1_input_buf[j + 1];
+
+            d_gpp_y += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
+        }
+        pu1_input_buf += i4_input_stride;
+    }
+
+    pu1_input_buf = (UWORD8 *) ps_input_buf->as_component_bufs[1].pv_data;
+    i4_input_stride = ps_input_buf->as_component_bufs[1].i4_data_stride;
+
+    for(i = 0; i < (u4_height >> 1) - 1; i++)
+    {
+        for(j = 0; j < u4_width - 2; j += 2)
+        {
+            UWORD8 u1_cur_pix = pu1_input_buf[j];
+            UWORD8 u1_bot_pix = pu1_input_buf[i4_input_stride + j];
+            UWORD8 u1_right_pix = pu1_input_buf[j + 2];
+
+            d_gpp_u += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
+
+            u1_cur_pix = pu1_input_buf[j + 1];
+            u1_bot_pix = pu1_input_buf[i4_input_stride + j + 1];
+            u1_right_pix = pu1_input_buf[j + 2 + 1];
+
+            d_gpp_v += (ABS(u1_cur_pix - u1_bot_pix) + ABS(u1_cur_pix - u1_right_pix));
+        }
+        pu1_input_buf += i4_input_stride;
+    }
+
+    d_gpp_y /= (u4_width * u4_height);
+    d_gpp_u /= ((u4_width >> 1) * (u4_height >> 1));
+    d_gpp_v /= ((u4_width >> 1) * (u4_height >> 1));
+
+    d_gpp = (DOUBLE) ((4 * d_gpp_y) + d_gpp_u + d_gpp_v) / 6;
+
+    return d_gpp;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   gets the memory size required for compute gpp
+*
+* @par Description:
+*   returns the memory required by the rc utils context and state structs
+*   for allocation.
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+UWORD32 isvce_get_rc_utils_data_size() { return sizeof(svc_rc_utils_state_t); }
+
+/**
+*******************************************************************************
+*
+* @brief
+*   compute gpp process
+*
+* @par Description:
+*   calls the function to compute gpp
+*
+* @param[in] ps_svc_rc_utils_ctxt
+*  pointer to svc rc utils context
+*
+* @param[in] ps_input_buf
+*  pointer to yuv buffer properties
+*
+* @returns
+*  calculated gpp value
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, yuv_buf_props_t *ps_input_buf)
+{
+    svc_rc_utils_state_t *ps_rc_utils_state =
+        (svc_rc_utils_state_t *) ps_svc_rc_utils_ctxt->pv_rc_utils_state;
+
+    return ps_rc_utils_state->pf_get_gpp(ps_input_buf);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   selects which function to call for get gpp based on e_arch
+*
+* @par Description:
+*
+* @param[in] ps_rc_utils_state
+*  pointer to svc rc utils state
+*
+* @param[in] e_arch
+*  architecure type
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+static void isvce_get_gpp_function_selector(svc_rc_utils_state_t *ps_rc_utils_state,
+                                            IV_ARCH_T e_arch)
+{
+    switch(e_arch)
+    {
+#if defined(X86)
+        case ARCH_X86_SSE42:
+        {
+            ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_sse42;
+
+            break;
+        }
+#elif defined(ARMV8)
+        case ARCH_ARM_A53:
+        case ARCH_ARM_A57:
+        case ARCH_ARM_V8_NEON:
+        {
+            ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon;
+
+            break;
+        }
+#elif !defined(DISABLE_NEON)
+        case ARCH_ARM_A9Q:
+        case ARCH_ARM_A9A:
+        case ARCH_ARM_A9:
+        case ARCH_ARM_A7:
+        case ARCH_ARM_A5:
+        case ARCH_ARM_A15:
+        {
+            ps_rc_utils_state->pf_get_gpp = isvce_get_gpp_neon;
+
+            break;
+        }
+#endif
+        default:
+        {
+            ps_rc_utils_state->pf_get_gpp = isvce_get_gpp;
+
+            break;
+        }
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   initializes the rc utils context
+*
+* @par Description:
+*   initializes the rc utils context
+*
+* @param[in] ps_svc_rc_utils_ctxt
+*   pointer to svc rc utils context
+*
+* @param[in] ps_mem_rec
+*   pointer to memory allocated to compute gpp process
+*
+* @param[in] e_arch
+*   architecure type
+*
+* @returns
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec,
+                         IV_ARCH_T e_arch)
+{
+    svc_rc_utils_state_t *ps_rc_utils_state;
+
+    UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+    ps_rc_utils_state = (svc_rc_utils_state_t *) pu1_buf;
+
+    ps_svc_rc_utils_ctxt->pv_rc_utils_state = ps_rc_utils_state;
+
+    isvce_get_gpp_function_selector(ps_rc_utils_state, e_arch);
+}
--- a/encoder/svc/isvce_rc_utils.h
+++ b/encoder/svc/isvce_rc_utils.h
@ -0,0 +1,134 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+/**
+*******************************************************************************
+* @file
+*  isvce_rc_utils.h
+*
+* @brief
+*  Contains get gpp function required by the SVC encoder
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - isvce_rc_utils_init()
+*  - isvce_get_rc_utils_data_size()
+*  - isvce_compute_gpp()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _ISVCE_RC_UTILS_H_
+#define _ISVCE_RC_UTILS_H_
+
+#include "ih264_typedefs.h"
+#include "isvc_structs.h"
+
+typedef struct
+{
+    /**
+     * pointer to the state of rc utils
+     */
+    void *pv_rc_utils_state;
+
+} svc_rc_utils_ctxt_t;
+
+/**
+*******************************************************************************
+*
+* @brief
+*   initializes the rc utils context
+*
+* @par Description:
+*   initializes the rc utils context
+*
+* @param[in] ps_svc_rc_utils_ctxt
+*   pointer to svc rc utils context
+*
+* @param[in] ps_mem_rec
+*   pointer to memory allocated to compute gpp process
+*
+* @param[in] e_arch
+*   architecure type
+*
+* @returns
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+extern void isvce_rc_utils_init(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt, iv_mem_rec_t *ps_mem_rec,
+                                IV_ARCH_T e_arch);
+
+/**
+*******************************************************************************
+*
+* @brief
+*   gets the memory size required for compute gpp
+*
+* @par Description:
+*   returns the memory required by the rc utils context and state structs
+*   for allocation.
+*
+* @returns
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+extern UWORD32 isvce_get_rc_utils_data_size();
+
+/**
+*******************************************************************************
+*
+* @brief
+*   compute gpp process
+*
+* @par Description:
+*   calls the function to compute gpp
+*
+* @param[in] ps_svc_rc_utils_ctxt
+*  pointer to svc rc utils context
+*
+* @param[in] ps_input_buf
+*  pointer to yuv buffer properties
+*
+* @returns
+*  calculated gpp value
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+extern DOUBLE isvce_compute_gpp(svc_rc_utils_ctxt_t *ps_svc_rc_utils_ctxt,
+                                yuv_buf_props_t *ps_input_buf);
+
+#endif
--- a/Show more
+++ b/Show more