encoder: Fix out of bound access of prediction buffer

While calculating the residual for inter 4x4 MB, the intrinsic
instruction reads extra 4-bytes from the prediction buffer

Test: POC in the bug description

Bug: 204704614

Change-Id: I72b5cb8b63351efb60b65ecbb5e7a8c8bc1fcd94
(cherry picked from commit c79d0f5092)
Merged-In: I72b5cb8b63351efb60b65ecbb5e7a8c8bc1fcd94
This commit is contained in:
Neelkamal Semwal 2021-11-09 12:07:44 +05:30 committed by Cherrypicker Worker
parent ad8d57b854
commit 27fbb43bd6
3 changed files with 23 additions and 8 deletions

View file

@ -227,10 +227,10 @@ void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src,
//Transform ends -- horizontal transform
//Load pred buffer
pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r0 = loadu_32(&pu1_pred[0]); //p00 p01 p02 p03 -- all 8 bits
pred_r1 = loadu_32(&pu1_pred[pred_strd]); //p10 p11 p12 p13 -- all 8 bits
pred_r2 = loadu_32(&pu1_pred[2 * pred_strd]); //p20 p21 p22 p23 -- all 8 bits
pred_r3 = loadu_32(&pu1_pred[3 * pred_strd]); //p30 p31 p32 p33 -- all 8 bits
pred_r0 = _mm_cvtepu8_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits
pred_r1 = _mm_cvtepu8_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits

View file

@ -41,6 +41,20 @@
#include <stdint.h>
#include <immintrin.h>
#ifndef __ANDROID__
static __inline__ __m128i
loadu_32(void const *__a)
{
struct __loadu_si32 {
int __v;
} __attribute__((__packed__, __may_alias__));
int __u = ((struct __loadu_si32*)__a)->__v;
return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
}
#else
static __inline__ __m128i loadu_32(void const *__a) { return _mm_loadu_si32(__a); };
#endif
#define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
#define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))

View file

@ -46,6 +46,7 @@
#include "ih264_defs.h"
#include "ih264_size_defs.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
#include "ih264_trans_macros.h"
#include "ih264_trans_data.h"
#include "ih264_structs.h"
@ -136,10 +137,10 @@ void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred,
src_r2 = _mm_cvtepu8_epi16(src_r2);
src_r3 = _mm_cvtepu8_epi16(src_r3);
pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r0 = loadu_32(&pu1_pred[0]); //p00 p01 p02 p03 -- all 8 bits
pred_r1 = loadu_32(&pu1_pred[pred_strd]); //p10 p11 p12 p13 -- all 8 bits
pred_r2 = loadu_32(&pu1_pred[2 * pred_strd]); //p20 p21 p22 p23 -- all 8 bits
pred_r3 = loadu_32(&pu1_pred[3 * pred_strd]); //p30 p31 p32 p33 -- all 8 bits
pred_r0 = _mm_cvtepu8_epi16(pred_r0); //p00 p01 p02 p03 -- all 16 bits
pred_r1 = _mm_cvtepu8_epi16(pred_r1); //p10 p11 p12 p13 -- all 16 bits