From da835cc8a38c54c09b4fe82b73a32781f57c2d4c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 2 Mar 2009 08:32:29 +0000
Subject: [PATCH 001/315] Create 0.5 release branch.

Originally committed as revision 17727 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libswscale/Makefile                   |   24 -
 libswscale/cs_test.c                  |  175 --
 libswscale/internal_bfin.S            |  606 -----
 libswscale/rgb2rgb.c                  |  442 ----
 libswscale/rgb2rgb.h                  |  147 --
 libswscale/rgb2rgb_template.c         | 2738 ---------------------
 libswscale/swscale-example.c          |  210 --
 libswscale/swscale.c                  | 3198 -------------------------
 libswscale/swscale.h                  |  247 --
 libswscale/swscale_altivec_template.c |  538 -----
 libswscale/swscale_avoption.c         |   60 -
 libswscale/swscale_bfin.c             |   91 -
 libswscale/swscale_internal.h         |  324 ---
 libswscale/swscale_template.c         | 3041 -----------------------
 libswscale/yuv2rgb.c                  |  684 ------
 libswscale/yuv2rgb_altivec.c          |  962 --------
 libswscale/yuv2rgb_bfin.c             |  203 --
 libswscale/yuv2rgb_mlib.c             |   85 -
 libswscale/yuv2rgb_template.c         |  453 ----
 libswscale/yuv2rgb_vis.c              |  209 --
 20 files changed, 14437 deletions(-)
 delete mode 100644 libswscale/Makefile
 delete mode 100644 libswscale/cs_test.c
 delete mode 100644 libswscale/internal_bfin.S
 delete mode 100644 libswscale/rgb2rgb.c
 delete mode 100644 libswscale/rgb2rgb.h
 delete mode 100644 libswscale/rgb2rgb_template.c
 delete mode 100644 libswscale/swscale-example.c
 delete mode 100644 libswscale/swscale.c
 delete mode 100644 libswscale/swscale.h
 delete mode 100644 libswscale/swscale_altivec_template.c
 delete mode 100644 libswscale/swscale_avoption.c
 delete mode 100644 libswscale/swscale_bfin.c
 delete mode 100644 libswscale/swscale_internal.h
 delete mode 100644 libswscale/swscale_template.c
 delete mode 100644 libswscale/yuv2rgb.c
 delete mode 100644 libswscale/yuv2rgb_altivec.c
 delete mode 100644 libswscale/yuv2rgb_bfin.c
 delete mode 100644 libswscale/yuv2rgb_mlib.c
 delete mode 100644 libswscale/yuv2rgb_template.c
 delete mode 100644 libswscale/yuv2rgb_vis.c

diff --git a/libswscale/Makefile b/libswscale/Makefile
deleted file mode 100644
index 6d500abc65..0000000000
--- a/libswscale/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-include $(SUBDIR)../config.mak
-
-NAME = swscale
-FFLIBS = avutil
-
-HEADERS = swscale.h
-
-OBJS = rgb2rgb.o swscale.o swscale_avoption.o yuv2rgb.o
-
-OBJS-$(ARCH_BFIN)          +=  internal_bfin.o swscale_bfin.o yuv2rgb_bfin.o
-OBJS-$(CONFIG_MLIB)        +=  yuv2rgb_mlib.o
-OBJS-$(HAVE_ALTIVEC)       +=  yuv2rgb_altivec.o
-OBJS-$(HAVE_VIS)           +=  yuv2rgb_vis.o
-
-TESTS = cs_test swscale-example
-
-CLEANFILES = cs_test swscale-example
-
-include $(SUBDIR)../subdir.mak
-
-$(SUBDIR)cs_test: $(SUBDIR)cs_test.o $(SUBDIR)$(LIBNAME)
-
-$(SUBDIR)swscale-example: $(SUBDIR)swscale-example.o $(SUBDIR)$(LIBNAME)
-$(SUBDIR)swscale-example: EXTRALIBS += -lm
diff --git a/libswscale/cs_test.c b/libswscale/cs_test.c
deleted file mode 100644
index 2223ee3a31..0000000000
--- a/libswscale/cs_test.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <string.h>              /* for memset() */
-#include <unistd.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "swscale.h"
-#include "rgb2rgb.h"
-
-#define SIZE 1000
-#define srcByte 0x55
-#define dstByte 0xBB
-
-#define FUNC(s,d,n) {s,d,#n,n}
-
-static int cpu_caps;
-
-static char *args_parse(int argc, char *argv[])
-{
-    int o;
-
-    while ((o = getopt(argc, argv, "m23")) != -1) {
-        switch (o) {
-            case 'm':
-                cpu_caps |= SWS_CPU_CAPS_MMX;
-                break;
-            case '2':
-                cpu_caps |= SWS_CPU_CAPS_MMX2;
-                break;
-            case '3':
-                cpu_caps |= SWS_CPU_CAPS_3DNOW;
-                break;
-            default:
-                av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
-        }
-    }
-
-    return argv[optind];
-}
-
-int main(int argc, char **argv)
-{
-    int i, funcNum;
-    uint8_t *srcBuffer= (uint8_t*)av_malloc(SIZE);
-    uint8_t *dstBuffer= (uint8_t*)av_malloc(SIZE);
-    int failedNum=0;
-    int passedNum=0;
-
-    av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
-    args_parse(argc, argv);
-    av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
-    sws_rgb2rgb_init(cpu_caps);
-
-    for(funcNum=0; ; funcNum++){
-        struct func_info_s {
-            int src_bpp;
-            int dst_bpp;
-            const char *name;
-            void (*func)(const uint8_t *src, uint8_t *dst, long src_size);
-        } func_info[] = {
-            FUNC(2, 2, rgb15to16),
-            FUNC(2, 3, rgb15to24),
-            FUNC(2, 4, rgb15to32),
-            FUNC(2, 3, rgb16to24),
-            FUNC(2, 4, rgb16to32),
-            FUNC(3, 2, rgb24to15),
-            FUNC(3, 2, rgb24to16),
-            FUNC(3, 4, rgb24to32),
-            FUNC(4, 2, rgb32to15),
-            FUNC(4, 2, rgb32to16),
-            FUNC(4, 3, rgb32to24),
-            FUNC(2, 2, rgb16to15),
-            FUNC(2, 2, rgb15tobgr15),
-            FUNC(2, 2, rgb15tobgr16),
-            FUNC(2, 3, rgb15tobgr24),
-            FUNC(2, 4, rgb15tobgr32),
-            FUNC(2, 2, rgb16tobgr15),
-            FUNC(2, 2, rgb16tobgr16),
-            FUNC(2, 3, rgb16tobgr24),
-            FUNC(2, 4, rgb16tobgr32),
-            FUNC(3, 2, rgb24tobgr15),
-            FUNC(3, 2, rgb24tobgr16),
-            FUNC(3, 3, rgb24tobgr24),
-            FUNC(3, 4, rgb24tobgr32),
-            FUNC(4, 2, rgb32tobgr15),
-            FUNC(4, 2, rgb32tobgr16),
-            FUNC(4, 3, rgb32tobgr24),
-            FUNC(4, 4, rgb32tobgr32),
-            FUNC(0, 0, NULL)
-        };
-        int width;
-        int failed=0;
-        int srcBpp=0;
-        int dstBpp=0;
-
-        if (!func_info[funcNum].func) break;
-
-        av_log(NULL, AV_LOG_INFO,".");
-        memset(srcBuffer, srcByte, SIZE);
-
-        for(width=63; width>0; width--){
-            int dstOffset;
-            for(dstOffset=128; dstOffset<196; dstOffset+=4){
-                int srcOffset;
-                memset(dstBuffer, dstByte, SIZE);
-
-                for(srcOffset=128; srcOffset<196; srcOffset+=4){
-                    uint8_t *src= srcBuffer+srcOffset;
-                    uint8_t *dst= dstBuffer+dstOffset;
-                    const char *name=NULL;
-
-                    if(failed) break; //don't fill the screen with shit ...
-
-                    srcBpp = func_info[funcNum].src_bpp;
-                    dstBpp = func_info[funcNum].dst_bpp;
-                    name   = func_info[funcNum].name;
-
-                    func_info[funcNum].func(src, dst, width*srcBpp);
-
-                    if(!srcBpp) break;
-
-                    for(i=0; i<SIZE; i++){
-                        if(srcBuffer[i]!=srcByte){
-                            av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n",
-                                   i, width, srcOffset, dstOffset, name);
-                            failed=1;
-                            break;
-                        }
-                    }
-                    for(i=0; i<dstOffset; i++){
-                        if(dstBuffer[i]!=dstByte){
-                            av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
-                                   i, width, srcOffset, dstOffset, name);
-                            failed=1;
-                            break;
-                        }
-                    }
-                    for(i=dstOffset + width*dstBpp; i<SIZE; i++){
-                        if(dstBuffer[i]!=dstByte){
-                            av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
-                                   i, width, srcOffset, dstOffset, name);
-                            failed=1;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-        if(failed) failedNum++;
-        else if(srcBpp) passedNum++;
-    }
-
-    av_log(NULL, AV_LOG_INFO, "\n%d converters passed, %d converters randomly overwrote memory\n", passedNum, failedNum);
-    return failedNum;
-}
diff --git a/libswscale/internal_bfin.S b/libswscale/internal_bfin.S
deleted file mode 100644
index fb7bda7e12..0000000000
--- a/libswscale/internal_bfin.S
+++ /dev/null
@@ -1,606 +0,0 @@
-/*
- * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
- *                    April 20, 2007
- *
- * Blackfin video color space converter operations
- * convert I420 YV12 to RGB in various formats
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-/*
-YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock
-and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts.
-
-
-The following calculation is used for the conversion:
-
-  r = clipz((y-oy)*cy  + crv*(v-128))
-  g = clipz((y-oy)*cy  + cgv*(v-128) + cgu*(u-128))
-  b = clipz((y-oy)*cy  + cbu*(u-128))
-
-y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision.
-
-
-New factorization to eliminate the truncation error which was
-occurring due to the byteop3p.
-
-
-1) Use the bytop16m to subtract quad bytes we use this in U8 this
- then so the offsets need to be renormalized to 8bits.
-
-2) Scale operands up by a factor of 4 not 8 because Blackfin
-   multiplies include a shift.
-
-3) Compute into the accumulators cy*yx0, cy*yx1.
-
-4) Compute each of the linear equations:
-     r = clipz((y - oy) * cy  + crv * (v - 128))
-
-     g = clipz((y - oy) * cy  + cgv * (v - 128) + cgu * (u - 128))
-
-     b = clipz((y - oy) * cy  + cbu * (u - 128))
-
-   Reuse of the accumulators requires that we actually multiply
-   twice once with addition and the second time with a subtraction.
-
-   Because of this we need to compute the equations in the order R B
-   then G saving the writes for B in the case of 24/32 bit color
-   formats.
-
-   API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out,
-                      int dW, uint32_t *coeffs);
-
-       A          B
-       ---        ---
-       i2 = cb    i3 = cr
-       i1 = coeff i0 = y
-
-Where coeffs have the following layout in memory.
-
-uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv;
-
-coeffs is a pointer to oy.
-
-The {rgb} masks are only utilized by the 565 packing algorithm. Note the data
-replication is used to simplify the internal algorithms for the dual Mac
-architecture of BlackFin.
-
-All routines are exported with _ff_bfin_ as a symbol prefix.
-
-Rough performance gain compared against -O3:
-
-2779809/1484290 187.28%
-
-which translates to ~33c/pel to ~57c/pel for the reference vs 17.5
-c/pel for the optimized implementations. Not sure why there is such a
-huge variation on the reference codes on Blackfin I guess it must have
-to do with the memory system.
-*/
-
-#define mL3 .text
-#ifdef __FDPIC__
-#define mL1 .l1.text
-#else
-#define mL1 mL3
-#endif
-#define MEM mL1
-
-#define DEFUN(fname,where,interface) \
-        .section where;              \
-        .global _ff_bfin_ ## fname;  \
-        .type _ff_bfin_ ## fname, STT_FUNC; \
-        .align 8;                    \
-        _ff_bfin_ ## fname
-
-#define DEFUN_END(fname) \
-        .size _ff_bfin_ ## fname, . - _ff_bfin_ ## fname
-
-
-.text
-
-#define COEFF_LEN        11*4
-#define COEFF_REL_CY_OFF 4*4
-
-#define ARG_OUT   20
-#define ARG_W     24
-#define ARG_COEFF 28
-
-DEFUN(yuv2rgb565_line,MEM,
-   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
-        link 0;
-        [--sp] = (r7:4);
-        p1 = [fp+ARG_OUT];
-        r3 = [fp+ARG_W];
-
-        i0 = r0;
-        i2 = r1;
-        i3 = r2;
-
-        r0 = [fp+ARG_COEFF];
-        i1 = r0;
-        b1 = i1;
-        l1 = COEFF_LEN;
-        m0 = COEFF_REL_CY_OFF;
-        p0 = r3;
-
-        r0   = [i0++];         // 2Y
-        r1.l = w[i2++];        // 2u
-        r1.h = w[i3++];        // 2v
-        p0 = p0>>2;
-
-        lsetup (.L0565, .L1565) lc0 = p0;
-
-        /*
-           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
-           r0 -- used to load 4ys
-           r1 -- used to load 2us,2vs
-           r4 -- y3,y2
-           r5 -- y1,y0
-           r6 -- u1,u0
-           r7 -- v1,v0
-        */
-                                                              r2=[i1++]; // oy
-.L0565:
-        /*
-        rrrrrrrr gggggggg bbbbbbbb
-         5432109876543210
-                    bbbbb >>3
-              gggggggg    <<3
-         rrrrrrrr         <<8
-         rrrrrggggggbbbbb
-        */
-        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
-        (r7,r6) = byteop16m (r1:0, r3:2) (r);
-        r5 = r5 << 2 (v);                                                // y1,y0
-        r4 = r4 << 2 (v);                                                // y3,y2
-        r6 = r6 << 2 (v)                                   || r0=[i1++]; // u1,u0, r0=zero
-        r7 = r7 << 2 (v)                                   || r1=[i1++]; // v1,v0  r1=cy
-        /* Y' = y*cy */
-        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
-                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 8 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask
-        r2 = r2 << 3 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1=[i1++]; // cy
-
-        /* Y' = y*cy */
-
-        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
-                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 8 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0   =  [i0++];        // 2Y
-        r2 = r2 << 3 (v)                                   || r1.l = w[i2++];        // 2u
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1.h = w[i3++];        // 2v
-.L1565:                                                       r2=[i1++]; // oy
-
-        l1 = 0;
-
-        (r7:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuv2rgb565_line)
-
-DEFUN(yuv2rgb555_line,MEM,
-   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
-        link 0;
-        [--sp] = (r7:4);
-        p1 = [fp+ARG_OUT];
-        r3 = [fp+ARG_W];
-
-        i0 = r0;
-        i2 = r1;
-        i3 = r2;
-
-        r0 = [fp+ARG_COEFF];
-        i1 = r0;
-        b1 = i1;
-        l1 = COEFF_LEN;
-        m0 = COEFF_REL_CY_OFF;
-        p0 = r3;
-
-        r0   = [i0++];         // 2Y
-        r1.l = w[i2++];        // 2u
-        r1.h = w[i3++];        // 2v
-        p0 = p0>>2;
-
-        lsetup (.L0555, .L1555) lc0 = p0;
-
-        /*
-           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
-           r0 -- used to load 4ys
-           r1 -- used to load 2us,2vs
-           r4 -- y3,y2
-           r5 -- y1,y0
-           r6 -- u1,u0
-           r7 -- v1,v0
-        */
-                                                              r2=[i1++]; // oy
-.L0555:
-        /*
-        rrrrrrrr gggggggg bbbbbbbb
-         5432109876543210
-                    bbbbb >>3
-               gggggggg   <<2
-          rrrrrrrr        <<7
-         xrrrrrgggggbbbbb
-        */
-
-        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
-        (r7,r6) = byteop16m (r1:0, r3:2) (r);
-        r5 = r5 << 2 (v);                                                // y1,y0
-        r4 = r4 << 2 (v);                                                // y3,y2
-        r6 = r6 << 2 (v)                                   || r0=[i1++]; // u1,u0, r0=zero
-        r7 = r7 << 2 (v)                                   || r1=[i1++]; // v1,v0  r1=cy
-        /* Y' = y*cy */
-        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
-                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 7 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask
-        r2 = r2 << 2 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1=[i1++]; // cy
-
-        /* Y' = y*cy */
-
-        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2 = r2 >> 3 (v);
-        r3 = r2 & r5;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
-                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-        r2 = r2 << 7 (v);
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0=[i0++];     // 4Y
-        r2 = r2 << 2 (v)                                   || r1.l=w[i2++];  // 2u
-        r2 = r2 & r5;
-        r3 = r3 | r2;
-        [p1++]=r3                                          || r1.h=w[i3++]; // 2v
-
-.L1555:                                                       r2=[i1++]; // oy
-
-        l1 = 0;
-
-        (r7:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuv2rgb555_line)
-
-DEFUN(yuv2rgb24_line,MEM,
-   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
-        link 0;
-        [--sp] = (r7:4);
-        p1 = [fp+ARG_OUT];
-        r3 = [fp+ARG_W];
-        p2 = p1;
-        p2 += 3;
-
-        i0 = r0;
-        i2 = r1;
-        i3 = r2;
-
-        r0 = [fp+ARG_COEFF]; // coeff buffer
-        i1 = r0;
-        b1 = i1;
-        l1 = COEFF_LEN;
-        m0 = COEFF_REL_CY_OFF;
-        p0 = r3;
-
-        r0   = [i0++];         // 2Y
-        r1.l = w[i2++];        // 2u
-        r1.h = w[i3++];        // 2v
-        p0 = p0>>2;
-
-        lsetup (.L0888, .L1888) lc0 = p0;
-
-        /*
-           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
-           r0 -- used to load 4ys
-           r1 -- used to load 2us,2vs
-           r4 -- y3,y2
-           r5 -- y1,y0
-           r6 -- u1,u0
-           r7 -- v1,v0
-        */
-                                                              r2=[i1++]; // oy
-.L0888:
-        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
-        (r7,r6) = byteop16m (r1:0, r3:2) (r);
-        r5 = r5 << 2 (v);               // y1,y0
-        r4 = r4 << 2 (v);               // y3,y2
-        r6 = r6 << 2 (v) || r0=[i1++];  // u1,u0, r0=zero
-        r7 = r7 << 2 (v) || r1=[i1++];  // v1,v0  r1=cy
-
-        /* Y' = y*cy */
-        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2=r2>>16 || B[p1++]=r2;
-                     B[p2++]=r2;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
-                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
-        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask, oy,cy,zero
-
-        r2=r2>>16 || B[p1++]=r2;
-                     B[p2++]=r2;
-
-        r3=r3>>16 || B[p1++]=r3;
-                     B[p2++]=r3                            || r1=[i1++]; // cy
-
-        p1+=3;
-        p2+=3;
-        /* Y' = y*cy */
-        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
-
-        /* R = Y+ crv*(Cr-128) */
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
-        r2=r2>>16 || B[p1++]=r2;
-        B[p2++]=r2;
-
-        /* B = Y+ cbu*(Cb-128) */
-        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
-                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
-        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
-
-        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
-                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
-        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
-        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++]; // gmask
-        r2=r2>>16 || B[p1++]=r2 || r0 = [i0++];    // 4y
-                     B[p2++]=r2 || r1.l = w[i2++]; // 2u
-        r3=r3>>16 || B[p1++]=r3 || r1.h = w[i3++]; // 2v
-                     B[p2++]=r3 || r2=[i1++];      // oy
-
-        p1+=3;
-.L1888: p2+=3;
-
-        l1 = 0;
-
-        (r7:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuv2rgb24_line)
-
-
-
-#define ARG_vdst        20
-#define ARG_width       24
-#define ARG_height      28
-#define ARG_lumStride   32
-#define ARG_chromStride 36
-#define ARG_srcStride   40
-
-DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
-        link 0;
-        [--sp] = (r7:4,p5:4);
-
-        p0 = r1;       // Y top even
-
-        i2 = r2; // *u
-        r2 = [fp + ARG_vdst];
-        i3 = r2; // *v
-
-        r1 = [fp + ARG_srcStride];
-        r2 = r0 + r1;
-        r1 += -8;  // i0,i1 is pre read need to correct
-        m0 = r1;
-
-        i0 = r0;  // uyvy_T even
-        i1 = r2;  // uyvy_B odd
-
-        p2 = [fp + ARG_lumStride];
-        p1 = p0 + p2;  // Y bot odd
-
-        p5 = [fp + ARG_width];
-        p4 = [fp + ARG_height];
-        r0 = p5;
-        p4 = p4 >> 1;
-        p5 = p5 >> 2;
-
-        r2 = [fp + ARG_chromStride];
-        r0 = r0 >> 1;
-        r2 = r2 - r0;
-        m1 = r2;
-
-        /*   I0,I1 - src input line pointers
-         *   p0,p1 - luma output line pointers
-         *   I2    - dstU
-         *   I3    - dstV
-         */
-
-        lsetup (0f, 1f) lc1 = p4;   // H/2
-0:        r0 = [i0++] || r2 = [i1++];
-          r1 = [i0++] || r3 = [i1++];
-          r4 = byteop1p(r1:0, r3:2);
-          r5 = byteop1p(r1:0, r3:2) (r);
-          lsetup (2f, 3f) lc0 = p5; // W/4
-2:          r0 = r0 >> 8(v);
-            r1 = r1 >> 8(v);
-            r2 = r2 >> 8(v);
-            r3 = r3 >> 8(v);
-            r0 = bytepack(r0, r1);
-            r2 = bytepack(r2, r3)         ||  [p0++] = r0;    // yyyy
-            r6 = pack(r5.l, r4.l)         ||  [p1++] = r2;    // yyyy
-            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];
-            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];
-            r4 = byteop1p(r1:0, r3:2)     ||  w[i2++] = r6.l; // uu
-3:          r5 = byteop1p(r1:0, r3:2) (r) ||  w[i3++] = r6.h; // vv
-
-          i0 += m0;
-          i1 += m0;
-          i2 += m1;
-          i3 += m1;
-          p0 = p0 + p2;
-1:        p1 = p1 + p2;
-
-        (r7:4,p5:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(uyvytoyv12)
-
-DEFUN(yuyvtoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
-        link 0;
-        [--sp] = (r7:4,p5:4);
-
-        p0 = r1;       // Y top even
-
-        i2 = r2; // *u
-        r2 = [fp + ARG_vdst];
-        i3 = r2; // *v
-
-        r1 = [fp + ARG_srcStride];
-        r2 = r0 + r1;
-        r1 += -8;  // i0,i1 is pre read need to correct
-        m0 = r1;
-
-        i0 = r0;  // uyvy_T even
-        i1 = r2;  // uyvy_B odd
-
-        p2 = [fp + ARG_lumStride];
-        p1 = p0 + p2;  // Y bot odd
-
-        p5 = [fp + ARG_width];
-        p4 = [fp + ARG_height];
-        r0 = p5;
-        p4 = p4 >> 1;
-        p5 = p5 >> 2;
-
-        r2 = [fp + ARG_chromStride];
-        r0 = r0 >> 1;
-        r2 = r2 - r0;
-        m1 = r2;
-
-        /*   I0,I1 - src input line pointers
-         *   p0,p1 - luma output line pointers
-         *   I2    - dstU
-         *   I3    - dstV
-         */
-
-        lsetup (0f, 1f) lc1 = p4;   // H/2
-0:        r0 = [i0++] || r2 = [i1++];
-          r1 = [i0++] || r3 = [i1++];
-          r4 = bytepack(r0, r1);
-          r5 = bytepack(r2, r3);
-          lsetup (2f, 3f) lc0 = p5; // W/4
-2:          r0 = r0 >> 8(v) || [p0++] = r4;  // yyyy-even
-            r1 = r1 >> 8(v) || [p1++] = r5;  // yyyy-odd
-            r2 = r2 >> 8(v);
-            r3 = r3 >> 8(v);
-            r4 = byteop1p(r1:0, r3:2);
-            r5 = byteop1p(r1:0, r3:2) (r);
-            r6 = pack(r5.l, r4.l);
-            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];
-            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];
-            r4 = bytepack(r0, r1)         ||  w[i2++] = r6.l; // uu
-3:          r5 = bytepack(r2, r3)         ||  w[i3++] = r6.h; // vv
-
-          i0 += m0;
-          i1 += m0;
-          i2 += m1;
-          i3 += m1;
-          p0 = p0 + p2;
-1:        p1 = p1 + p2;
-
-        (r7:4,p5:4) = [sp++];
-        unlink;
-        rts;
-DEFUN_END(yuyvtoyv12)
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
deleted file mode 100644
index ad69265c37..0000000000
--- a/libswscale/rgb2rgb.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * software RGB to RGB converter
- * pluralize by software PAL8 to RGB converter
- *              software YUV to YUV converter
- *              software YUV to RGB converter
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * The C code (not assembly, MMX, ...) of this file can be used
- * under the LGPL license.
- */
-#include <inttypes.h>
-#include "config.h"
-#include "libavutil/x86_cpu.h"
-#include "libavutil/bswap.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
-
-void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-
-void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
-void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
-void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
-void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
-void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                   long width, long height,
-                   long lumStride, long chromStride, long srcStride);
-void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                    long width, long height,
-                    long lumStride, long chromStride, long srcStride);
-void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                 long srcStride, long dstStride);
-void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst,
-                        long width, long height, long src1Stride,
-                        long src2Stride, long dstStride);
-void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
-                    uint8_t *dst1, uint8_t *dst2,
-                    long width, long height,
-                    long srcStride1, long srcStride2,
-                    long dstStride1, long dstStride2);
-void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                     uint8_t *dst,
-                     long width, long height,
-                     long srcStride1, long srcStride2,
-                     long srcStride3, long dstStride);
-
-#if ARCH_X86 && CONFIG_GPL
-DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_one)      = 0xFFFFFFFFFFFFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32b)      = 0x000000FF000000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32g)      = 0x0000FF000000FF00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32r)      = 0x00FF000000FF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32a)      = 0xFF000000FF000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask32)       = 0x00FFFFFF00FFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3216br)   = 0x00F800F800F800F8ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3216g)    = 0x0000FC000000FC00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask3215g)    = 0x0000F8000000F800ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul3216)      = 0x2000000420000004ULL;
-DECLARE_ASM_CONST(8, uint64_t, mul3215)      = 0x2000000820000008ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24b)      = 0x00FF0000FF0000FFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24g)      = 0xFF0000FF0000FF00ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24r)      = 0x0000FF0000FF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24l)      = 0x0000000000FFFFFFULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24h)      = 0x0000FFFFFF000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hh)     = 0xffff000000000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hhh)    = 0xffffffff00000000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask24hhhh)   = 0xffffffffffff0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15b)      = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
-DECLARE_ASM_CONST(8, uint64_t, mask15rg)     = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
-DECLARE_ASM_CONST(8, uint64_t, mask15s)      = 0xFFE0FFE0FFE0FFE0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15g)      = 0x03E003E003E003E0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask15r)      = 0x7C007C007C007C00ULL;
-#define mask16b mask15b
-DECLARE_ASM_CONST(8, uint64_t, mask16g)      = 0x07E007E007E007E0ULL;
-DECLARE_ASM_CONST(8, uint64_t, mask16r)      = 0xF800F800F800F800ULL;
-DECLARE_ASM_CONST(8, uint64_t, red_16mask)   = 0x0000f8000000f800ULL;
-DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
-DECLARE_ASM_CONST(8, uint64_t, blue_16mask)  = 0x0000001f0000001fULL;
-DECLARE_ASM_CONST(8, uint64_t, red_15mask)   = 0x00007c0000007c00ULL;
-DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
-DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
-#endif /* ARCH_X86 */
-
-#define RGB2YUV_SHIFT 8
-#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
-#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
-#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
-#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
-#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
-#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
-#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
-
-//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
-//plain C versions
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#undef HAVE_SSE2
-#define HAVE_MMX 0
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
-#define HAVE_SSE2 0
-#define RENAME(a) a ## _C
-#include "rgb2rgb_template.c"
-
-#if ARCH_X86 && CONFIG_GPL
-
-//MMX versions
-#undef RENAME
-#undef HAVE_MMX
-#define HAVE_MMX 1
-#define RENAME(a) a ## _MMX
-#include "rgb2rgb_template.c"
-
-//MMX2 versions
-#undef RENAME
-#undef HAVE_MMX2
-#define HAVE_MMX2 1
-#define RENAME(a) a ## _MMX2
-#include "rgb2rgb_template.c"
-
-//3DNOW versions
-#undef RENAME
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 1
-#define RENAME(a) a ## _3DNOW
-#include "rgb2rgb_template.c"
-
-#endif //ARCH_X86 || ARCH_X86_64
-
-/*
- RGB15->RGB16 original by Strepto/Astral
- ported to gcc & bugfixed : A'rpi
- MMX2, 3DNOW optimization by Nick Kurshev
- 32-bit C version, and and&add trick by Michael Niedermayer
-*/
-
-void sws_rgb2rgb_init(int flags){
-#if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX)  && CONFIG_GPL
-    if (flags & SWS_CPU_CAPS_MMX2)
-        rgb2rgb_init_MMX2();
-    else if (flags & SWS_CPU_CAPS_3DNOW)
-        rgb2rgb_init_3DNOW();
-    else if (flags & SWS_CPU_CAPS_MMX)
-        rgb2rgb_init_MMX();
-    else
-#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
-        rgb2rgb_init_C();
-}
-
-/**
- * Convert the palette to the same packet 32-bit format as the palette
- */
-void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
-{
-    long i;
-
-    for (i=0; i<num_pixels; i++)
-        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
-}
-
-/**
- * Palette format: ABCD -> dst format: ABC
- */
-void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
-{
-    long i;
-
-    for (i=0; i<num_pixels; i++)
-    {
-        //FIXME slow?
-        dst[0]= palette[src[i]*4+0];
-        dst[1]= palette[src[i]*4+1];
-        dst[2]= palette[src[i]*4+2];
-        dst+= 3;
-    }
-}
-
-/**
- * Palette is assumed to contain BGR16, see rgb32to16 to convert the palette.
- */
-void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
-{
-    long i;
-    for (i=0; i<num_pixels; i++)
-        ((uint16_t *)dst)[i] = ((const uint16_t *)palette)[src[i]];
-}
-void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
-{
-    long i;
-    for (i=0; i<num_pixels; i++)
-        ((uint16_t *)dst)[i] = bswap_16(((const uint16_t *)palette)[src[i]]);
-}
-
-/**
- * Palette is assumed to contain BGR15, see rgb32to15 to convert the palette.
- */
-void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
-{
-    long i;
-    for (i=0; i<num_pixels; i++)
-        ((uint16_t *)dst)[i] = ((const uint16_t *)palette)[src[i]];
-}
-void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
-{
-    long i;
-    for (i=0; i<num_pixels; i++)
-        ((uint16_t *)dst)[i] = bswap_16(((const uint16_t *)palette)[src[i]]);
-}
-
-void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long i;
-    long num_pixels = src_size >> 2;
-    for (i=0; i<num_pixels; i++)
-    {
-        #ifdef WORDS_BIGENDIAN
-            /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
-            dst[3*i + 0] = src[4*i + 1];
-            dst[3*i + 1] = src[4*i + 2];
-            dst[3*i + 2] = src[4*i + 3];
-        #else
-            dst[3*i + 0] = src[4*i + 2];
-            dst[3*i + 1] = src[4*i + 1];
-            dst[3*i + 2] = src[4*i + 0];
-        #endif
-    }
-}
-
-void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long i;
-    for (i=0; 3*i<src_size; i++)
-    {
-        #ifdef WORDS_BIGENDIAN
-            /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
-            dst[4*i + 0] = 255;
-            dst[4*i + 1] = src[3*i + 0];
-            dst[4*i + 2] = src[3*i + 1];
-            dst[4*i + 3] = src[3*i + 2];
-        #else
-            dst[4*i + 0] = src[3*i + 2];
-            dst[4*i + 1] = src[3*i + 1];
-            dst[4*i + 2] = src[3*i + 0];
-            dst[4*i + 3] = 255;
-        #endif
-    }
-}
-
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-    while (s < end)
-    {
-        register uint16_t bgr;
-        bgr = *s++;
-        #ifdef WORDS_BIGENDIAN
-            *d++ = 255;
-            *d++ = (bgr&0x1F)<<3;
-            *d++ = (bgr&0x7E0)>>3;
-            *d++ = (bgr&0xF800)>>8;
-        #else
-            *d++ = (bgr&0xF800)>>8;
-            *d++ = (bgr&0x7E0)>>3;
-            *d++ = (bgr&0x1F)<<3;
-            *d++ = 255;
-        #endif
-    }
-}
-
-void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-    while (s < end)
-    {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = (bgr&0xF800)>>8;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0x1F)<<3;
-    }
-}
-
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long i;
-    long num_pixels = src_size >> 1;
-
-    for (i=0; i<num_pixels; i++)
-    {
-        unsigned rgb = ((const uint16_t*)src)[i];
-        ((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11);
-    }
-}
-
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long i;
-    long num_pixels = src_size >> 1;
-
-    for (i=0; i<num_pixels; i++)
-    {
-        unsigned rgb = ((const uint16_t*)src)[i];
-        ((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10);
-    }
-}
-
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-    while (s < end)
-    {
-        register uint16_t bgr;
-        bgr = *s++;
-        #ifdef WORDS_BIGENDIAN
-            *d++ = 255;
-            *d++ = (bgr&0x1F)<<3;
-            *d++ = (bgr&0x3E0)>>2;
-            *d++ = (bgr&0x7C00)>>7;
-        #else
-            *d++ = (bgr&0x7C00)>>7;
-            *d++ = (bgr&0x3E0)>>2;
-            *d++ = (bgr&0x1F)<<3;
-            *d++ = 255;
-        #endif
-    }
-}
-
-void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-    while (s < end)
-    {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = (bgr&0x7C00)>>7;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x1F)<<3;
-    }
-}
-
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long i;
-    long num_pixels = src_size >> 1;
-
-    for (i=0; i<num_pixels; i++)
-    {
-        unsigned rgb = ((const uint16_t*)src)[i];
-        ((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11);
-    }
-}
-
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long i;
-    long num_pixels = src_size >> 1;
-
-    for (i=0; i<num_pixels; i++)
-    {
-        unsigned br;
-        unsigned rgb = ((const uint16_t*)src)[i];
-        br = rgb&0x7c1F;
-        ((uint16_t*)dst)[i] = (br>>10) | (rgb&0x3E0) | (br<<10);
-    }
-}
-
-void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long i;
-    long num_pixels = src_size;
-    for (i=0; i<num_pixels; i++)
-    {
-        unsigned b,g,r;
-        register uint8_t rgb;
-        rgb = src[i];
-        r = (rgb&0x07);
-        g = (rgb&0x38)>>3;
-        b = (rgb&0xC0)>>6;
-        dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
-    }
-}
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
deleted file mode 100644
index df912c8533..0000000000
--- a/libswscale/rgb2rgb.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- *  software RGB to RGB converter
- *  pluralize by Software PAL8 to RGB converter
- *               Software YUV to YUV converter
- *               Software YUV to RGB converter
- *  Written by Nick Kurshev.
- *  palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_RGB2RGB_H
-#define SWSCALE_RGB2RGB_H
-
-#include <inttypes.h>
-
-/* A full collection of RGB to RGB(BGR) converters */
-extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-
-void rgb24to32   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb32to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void bgr8torgb8  (const uint8_t *src, uint8_t *dst, long src_size);
-
-
-void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line, others are ignored.
- * FIXME: Write high quality version.
- */
-//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
-
-/**
- * Width should be a multiple of 16.
- */
-extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                          long width, long height,
-                          long lumStride, long chromStride, long srcStride);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
-
-/**
- * Width should be a multiple of 16.
- */
-extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 2.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line, others are ignored.
- * FIXME: Write high quality version.
- */
-extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           long width, long height,
-                           long lumStride, long chromStride, long srcStride);
-extern void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                        long srcStride, long dstStride);
-
-extern void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst,
-                               long width, long height, long src1Stride,
-                               long src2Stride, long dstStride);
-
-extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
-                           uint8_t *dst1, uint8_t *dst2,
-                           long width, long height,
-                           long srcStride1, long srcStride2,
-                           long dstStride1, long dstStride2);
-
-extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                            uint8_t *dst,
-                            long width, long height,
-                            long srcStride1, long srcStride2,
-                            long srcStride3, long dstStride);
-
-void sws_rgb2rgb_init(int flags);
-
-#endif /* SWSCALE_RGB2RGB_H */
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
deleted file mode 100644
index e95b628049..0000000000
--- a/libswscale/rgb2rgb_template.c
+++ /dev/null
@@ -1,2738 +0,0 @@
-/*
- * software RGB to RGB converter
- * pluralize by software PAL8 to RGB converter
- *              software YUV to YUV converter
- *              software YUV to RGB converter
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- * lot of big-endian byte order fixes by Alex Beregszaszi
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * The C code (not assembly, MMX, ...) of this file can be used
- * under the LGPL license.
- */
-
-#include <stddef.h>
-
-#undef PREFETCH
-#undef MOVNTQ
-#undef EMMS
-#undef SFENCE
-#undef MMREG_SIZE
-#undef PREFETCHW
-#undef PAVGB
-
-#if HAVE_SSE2
-#define MMREG_SIZE 16
-#else
-#define MMREG_SIZE 8
-#endif
-
-#if HAVE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#define PREFETCHW "prefetchw"
-#define PAVGB     "pavgusb"
-#elif HAVE_MMX2
-#define PREFETCH "prefetchnta"
-#define PREFETCHW "prefetcht0"
-#define PAVGB     "pavgb"
-#else
-#define PREFETCH  " # nop"
-#define PREFETCHW " # nop"
-#endif
-
-#if HAVE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS     "femms"
-#else
-#define EMMS     "emms"
-#endif
-
-#if HAVE_MMX2
-#define MOVNTQ "movntq"
-#define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE " # nop"
-#endif
-
-static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    uint8_t *dest = dst;
-    const uint8_t *s = src;
-    const uint8_t *end;
-    #if HAVE_MMX
-        const uint8_t *mm_end;
-    #endif
-    end = s + src_size;
-    #if HAVE_MMX
-        __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-        mm_end = end - 23;
-        __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
-        while (s < mm_end)
-        {
-            __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "punpckldq    3%1, %%mm0    \n\t"
-            "movd         6%1, %%mm1    \n\t"
-            "punpckldq    9%1, %%mm1    \n\t"
-            "movd        12%1, %%mm2    \n\t"
-            "punpckldq   15%1, %%mm2    \n\t"
-            "movd        18%1, %%mm3    \n\t"
-            "punpckldq   21%1, %%mm3    \n\t"
-            "por        %%mm7, %%mm0    \n\t"
-            "por        %%mm7, %%mm1    \n\t"
-            "por        %%mm7, %%mm2    \n\t"
-            "por        %%mm7, %%mm3    \n\t"
-            MOVNTQ"     %%mm0,   %0     \n\t"
-            MOVNTQ"     %%mm1,  8%0     \n\t"
-            MOVNTQ"     %%mm2, 16%0     \n\t"
-            MOVNTQ"     %%mm3, 24%0"
-            :"=m"(*dest)
-            :"m"(*s)
-            :"memory");
-            dest += 32;
-            s += 24;
-        }
-        __asm__ volatile(SFENCE:::"memory");
-        __asm__ volatile(EMMS:::"memory");
-    #endif
-    while (s < end)
-    {
-    #ifdef WORDS_BIGENDIAN
-        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
-        *dest++ = 255;
-        *dest++ = s[2];
-        *dest++ = s[1];
-        *dest++ = s[0];
-        s+=3;
-    #else
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = 255;
-    #endif
-    }
-}
-
-static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    uint8_t *dest = dst;
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 31;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movq          %1, %%mm0    \n\t"
-        "movq         8%1, %%mm1    \n\t"
-        "movq        16%1, %%mm4    \n\t"
-        "movq        24%1, %%mm5    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm1, %%mm3    \n\t"
-        "movq       %%mm4, %%mm6    \n\t"
-        "movq       %%mm5, %%mm7    \n\t"
-        "psrlq         $8, %%mm2    \n\t"
-        "psrlq         $8, %%mm3    \n\t"
-        "psrlq         $8, %%mm6    \n\t"
-        "psrlq         $8, %%mm7    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm1    \n\t"
-        "pand          %2, %%mm4    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "pand          %3, %%mm2    \n\t"
-        "pand          %3, %%mm3    \n\t"
-        "pand          %3, %%mm6    \n\t"
-        "pand          %3, %%mm7    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm3, %%mm1    \n\t"
-        "por        %%mm6, %%mm4    \n\t"
-        "por        %%mm7, %%mm5    \n\t"
-
-        "movq       %%mm1, %%mm2    \n\t"
-        "movq       %%mm4, %%mm3    \n\t"
-        "psllq        $48, %%mm2    \n\t"
-        "psllq        $32, %%mm3    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "pand          %5, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psrlq        $16, %%mm1    \n\t"
-        "psrlq        $32, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm3, %%mm1    \n\t"
-        "pand          %6, %%mm5    \n\t"
-        "por        %%mm5, %%mm4    \n\t"
-
-        MOVNTQ"     %%mm0,   %0     \n\t"
-        MOVNTQ"     %%mm1,  8%0     \n\t"
-        MOVNTQ"     %%mm4, 16%0"
-        :"=m"(*dest)
-        :"m"(*s),"m"(mask24l),
-         "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
-        :"memory");
-        dest += 24;
-        s += 32;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-#ifdef WORDS_BIGENDIAN
-        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
-        s++;
-        dest[2] = *s++;
-        dest[1] = *s++;
-        dest[0] = *s++;
-        dest += 3;
-#else
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = *s++;
-        s++;
-#endif
-    }
-}
-
-/*
- original by Strepto/Astral
- ported to gcc & bugfixed: A'rpi
- MMX2, 3DNOW optimization by Nick Kurshev
- 32-bit C version, and and&add trick by Michael Niedermayer
-*/
-static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    register const uint8_t* s=src;
-    register uint8_t* d=dst;
-    register const uint8_t *end;
-    const uint8_t *mm_end;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
-    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
-    mm_end = end - 15;
-    while (s<mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"  32%1         \n\t"
-        "movq        %1, %%mm0  \n\t"
-        "movq       8%1, %%mm2  \n\t"
-        "movq     %%mm0, %%mm1  \n\t"
-        "movq     %%mm2, %%mm3  \n\t"
-        "pand     %%mm4, %%mm0  \n\t"
-        "pand     %%mm4, %%mm2  \n\t"
-        "paddw    %%mm1, %%mm0  \n\t"
-        "paddw    %%mm3, %%mm2  \n\t"
-        MOVNTQ"   %%mm0,  %0    \n\t"
-        MOVNTQ"   %%mm2, 8%0"
-        :"=m"(*d)
-        :"m"(*s)
-        );
-        d+=16;
-        s+=16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    mm_end = end - 3;
-    while (s < mm_end)
-    {
-        register unsigned x= *((const uint32_t *)s);
-        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
-        d+=4;
-        s+=4;
-    }
-    if (s < end)
-    {
-        register unsigned short x= *((const uint16_t *)s);
-        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
-    }
-}
-
-static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    register const uint8_t* s=src;
-    register uint8_t* d=dst;
-    register const uint8_t *end;
-    const uint8_t *mm_end;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
-    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
-    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
-    mm_end = end - 15;
-    while (s<mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"  32%1         \n\t"
-        "movq        %1, %%mm0  \n\t"
-        "movq       8%1, %%mm2  \n\t"
-        "movq     %%mm0, %%mm1  \n\t"
-        "movq     %%mm2, %%mm3  \n\t"
-        "psrlq       $1, %%mm0  \n\t"
-        "psrlq       $1, %%mm2  \n\t"
-        "pand     %%mm7, %%mm0  \n\t"
-        "pand     %%mm7, %%mm2  \n\t"
-        "pand     %%mm6, %%mm1  \n\t"
-        "pand     %%mm6, %%mm3  \n\t"
-        "por      %%mm1, %%mm0  \n\t"
-        "por      %%mm3, %%mm2  \n\t"
-        MOVNTQ"   %%mm0,  %0    \n\t"
-        MOVNTQ"   %%mm2, 8%0"
-        :"=m"(*d)
-        :"m"(*s)
-        );
-        d+=16;
-        s+=16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    mm_end = end - 3;
-    while (s < mm_end)
-    {
-        register uint32_t x= *((const uint32_t*)s);
-        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
-        s+=4;
-        d+=4;
-    }
-    if (s < end)
-    {
-        register uint16_t x= *((const uint16_t*)s);
-        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
-        s+=2;
-        d+=2;
-    }
-}
-
-static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
-    __asm__ volatile(
-    "movq           %3, %%mm5   \n\t"
-    "movq           %4, %%mm6   \n\t"
-    "movq           %5, %%mm7   \n\t"
-    "jmp 2f                     \n\t"
-    ASMALIGN(4)
-    "1:                         \n\t"
-    PREFETCH"   32(%1)          \n\t"
-    "movd         (%1), %%mm0   \n\t"
-    "movd        4(%1), %%mm3   \n\t"
-    "punpckldq   8(%1), %%mm0   \n\t"
-    "punpckldq  12(%1), %%mm3   \n\t"
-    "movq        %%mm0, %%mm1   \n\t"
-    "movq        %%mm3, %%mm4   \n\t"
-    "pand        %%mm6, %%mm0   \n\t"
-    "pand        %%mm6, %%mm3   \n\t"
-    "pmaddwd     %%mm7, %%mm0   \n\t"
-    "pmaddwd     %%mm7, %%mm3   \n\t"
-    "pand        %%mm5, %%mm1   \n\t"
-    "pand        %%mm5, %%mm4   \n\t"
-    "por         %%mm1, %%mm0   \n\t"
-    "por         %%mm4, %%mm3   \n\t"
-    "psrld          $5, %%mm0   \n\t"
-    "pslld         $11, %%mm3   \n\t"
-    "por         %%mm3, %%mm0   \n\t"
-    MOVNTQ"      %%mm0, (%0)    \n\t"
-    "add           $16,  %1     \n\t"
-    "add            $8,  %0     \n\t"
-    "2:                         \n\t"
-    "cmp            %2,  %1     \n\t"
-    " jb            1b          \n\t"
-    : "+r" (d), "+r"(s)
-    : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
-    );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq    %0, %%mm7    \n\t"
-        "movq    %1, %%mm6    \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movd          %1, %%mm0    \n\t"
-        "movd         4%1, %%mm3    \n\t"
-        "punpckldq    8%1, %%mm0    \n\t"
-        "punpckldq   12%1, %%mm3    \n\t"
-        "movq       %%mm0, %%mm1    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm3, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "psrlq         $3, %%mm0    \n\t"
-        "psrlq         $3, %%mm3    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm3    \n\t"
-        "psrlq         $5, %%mm1    \n\t"
-        "psrlq         $5, %%mm4    \n\t"
-        "pand       %%mm6, %%mm1    \n\t"
-        "pand       %%mm6, %%mm4    \n\t"
-        "psrlq         $8, %%mm2    \n\t"
-        "psrlq         $8, %%mm5    \n\t"
-        "pand       %%mm7, %%mm2    \n\t"
-        "pand       %%mm7, %%mm5    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        "psllq        $16, %%mm3    \n\t"
-        "por        %%mm3, %%mm0    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
-    }
-}
-
-static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 15;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movd          %1, %%mm0    \n\t"
-        "movd         4%1, %%mm3    \n\t"
-        "punpckldq    8%1, %%mm0    \n\t"
-        "punpckldq   12%1, %%mm3    \n\t"
-        "movq       %%mm0, %%mm1    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm3, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "psllq         $8, %%mm0    \n\t"
-        "psllq         $8, %%mm3    \n\t"
-        "pand       %%mm7, %%mm0    \n\t"
-        "pand       %%mm7, %%mm3    \n\t"
-        "psrlq         $5, %%mm1    \n\t"
-        "psrlq         $5, %%mm4    \n\t"
-        "pand       %%mm6, %%mm1    \n\t"
-        "pand       %%mm6, %%mm4    \n\t"
-        "psrlq        $19, %%mm2    \n\t"
-        "psrlq        $19, %%mm5    \n\t"
-        "pand          %2, %%mm2    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        "psllq        $16, %%mm3    \n\t"
-        "por        %%mm3, %%mm0    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
-    }
-}
-
-static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
-    __asm__ volatile(
-    "movq           %3, %%mm5   \n\t"
-    "movq           %4, %%mm6   \n\t"
-    "movq           %5, %%mm7   \n\t"
-    "jmp            2f          \n\t"
-    ASMALIGN(4)
-    "1:                         \n\t"
-    PREFETCH"   32(%1)          \n\t"
-    "movd         (%1), %%mm0   \n\t"
-    "movd        4(%1), %%mm3   \n\t"
-    "punpckldq   8(%1), %%mm0   \n\t"
-    "punpckldq  12(%1), %%mm3   \n\t"
-    "movq        %%mm0, %%mm1   \n\t"
-    "movq        %%mm3, %%mm4   \n\t"
-    "pand        %%mm6, %%mm0   \n\t"
-    "pand        %%mm6, %%mm3   \n\t"
-    "pmaddwd     %%mm7, %%mm0   \n\t"
-    "pmaddwd     %%mm7, %%mm3   \n\t"
-    "pand        %%mm5, %%mm1   \n\t"
-    "pand        %%mm5, %%mm4   \n\t"
-    "por         %%mm1, %%mm0   \n\t"
-    "por         %%mm4, %%mm3   \n\t"
-    "psrld          $6, %%mm0   \n\t"
-    "pslld         $10, %%mm3   \n\t"
-    "por         %%mm3, %%mm0   \n\t"
-    MOVNTQ"      %%mm0, (%0)    \n\t"
-    "add           $16,  %1     \n\t"
-    "add            $8,  %0     \n\t"
-    "2:                         \n\t"
-    "cmp            %2,  %1     \n\t"
-    " jb            1b          \n\t"
-    : "+r" (d), "+r"(s)
-    : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
-    );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movd          %1, %%mm0    \n\t"
-        "movd         4%1, %%mm3    \n\t"
-        "punpckldq    8%1, %%mm0    \n\t"
-        "punpckldq   12%1, %%mm3    \n\t"
-        "movq       %%mm0, %%mm1    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm3, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "psrlq         $3, %%mm0    \n\t"
-        "psrlq         $3, %%mm3    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm3    \n\t"
-        "psrlq         $6, %%mm1    \n\t"
-        "psrlq         $6, %%mm4    \n\t"
-        "pand       %%mm6, %%mm1    \n\t"
-        "pand       %%mm6, %%mm4    \n\t"
-        "psrlq         $9, %%mm2    \n\t"
-        "psrlq         $9, %%mm5    \n\t"
-        "pand       %%mm7, %%mm2    \n\t"
-        "pand       %%mm7, %%mm5    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        "psllq        $16, %%mm3    \n\t"
-        "por        %%mm3, %%mm0    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
-    }
-}
-
-static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 15;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movd          %1, %%mm0    \n\t"
-        "movd         4%1, %%mm3    \n\t"
-        "punpckldq    8%1, %%mm0    \n\t"
-        "punpckldq   12%1, %%mm3    \n\t"
-        "movq       %%mm0, %%mm1    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm3, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "psllq         $7, %%mm0    \n\t"
-        "psllq         $7, %%mm3    \n\t"
-        "pand       %%mm7, %%mm0    \n\t"
-        "pand       %%mm7, %%mm3    \n\t"
-        "psrlq         $6, %%mm1    \n\t"
-        "psrlq         $6, %%mm4    \n\t"
-        "pand       %%mm6, %%mm1    \n\t"
-        "pand       %%mm6, %%mm4    \n\t"
-        "psrlq        $19, %%mm2    \n\t"
-        "psrlq        $19, %%mm5    \n\t"
-        "pand          %2, %%mm2    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        "psllq        $16, %%mm3    \n\t"
-        "por        %%mm3, %%mm0    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 16;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
-    }
-}
-
-static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 11;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movd          %1, %%mm0    \n\t"
-        "movd         3%1, %%mm3    \n\t"
-        "punpckldq    6%1, %%mm0    \n\t"
-        "punpckldq    9%1, %%mm3    \n\t"
-        "movq       %%mm0, %%mm1    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm3, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "psrlq         $3, %%mm0    \n\t"
-        "psrlq         $3, %%mm3    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm3    \n\t"
-        "psrlq         $5, %%mm1    \n\t"
-        "psrlq         $5, %%mm4    \n\t"
-        "pand       %%mm6, %%mm1    \n\t"
-        "pand       %%mm6, %%mm4    \n\t"
-        "psrlq         $8, %%mm2    \n\t"
-        "psrlq         $8, %%mm5    \n\t"
-        "pand       %%mm7, %%mm2    \n\t"
-        "pand       %%mm7, %%mm5    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        "psllq        $16, %%mm3    \n\t"
-        "por        %%mm3, %%mm0    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        const int b = *s++;
-        const int g = *s++;
-        const int r = *s++;
-        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
-    }
-}
-
-static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    mm_end = end - 15;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movd          %1, %%mm0    \n\t"
-        "movd         3%1, %%mm3    \n\t"
-        "punpckldq    6%1, %%mm0    \n\t"
-        "punpckldq    9%1, %%mm3    \n\t"
-        "movq       %%mm0, %%mm1    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm3, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "psllq         $8, %%mm0    \n\t"
-        "psllq         $8, %%mm3    \n\t"
-        "pand       %%mm7, %%mm0    \n\t"
-        "pand       %%mm7, %%mm3    \n\t"
-        "psrlq         $5, %%mm1    \n\t"
-        "psrlq         $5, %%mm4    \n\t"
-        "pand       %%mm6, %%mm1    \n\t"
-        "pand       %%mm6, %%mm4    \n\t"
-        "psrlq        $19, %%mm2    \n\t"
-        "psrlq        $19, %%mm5    \n\t"
-        "pand          %2, %%mm2    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        "psllq        $16, %%mm3    \n\t"
-        "por        %%mm3, %%mm0    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        const int r = *s++;
-        const int g = *s++;
-        const int b = *s++;
-        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
-    }
-}
-
-static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 11;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movd          %1, %%mm0    \n\t"
-        "movd         3%1, %%mm3    \n\t"
-        "punpckldq    6%1, %%mm0    \n\t"
-        "punpckldq    9%1, %%mm3    \n\t"
-        "movq       %%mm0, %%mm1    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm3, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "psrlq         $3, %%mm0    \n\t"
-        "psrlq         $3, %%mm3    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm3    \n\t"
-        "psrlq         $6, %%mm1    \n\t"
-        "psrlq         $6, %%mm4    \n\t"
-        "pand       %%mm6, %%mm1    \n\t"
-        "pand       %%mm6, %%mm4    \n\t"
-        "psrlq         $9, %%mm2    \n\t"
-        "psrlq         $9, %%mm5    \n\t"
-        "pand       %%mm7, %%mm2    \n\t"
-        "pand       %%mm7, %%mm5    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        "psllq        $16, %%mm3    \n\t"
-        "por        %%mm3, %%mm0    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        const int b = *s++;
-        const int g = *s++;
-        const int r = *s++;
-        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
-    }
-}
-
-static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint8_t *s = src;
-    const uint8_t *end;
-#if HAVE_MMX
-    const uint8_t *mm_end;
-#endif
-    uint16_t *d = (uint16_t *)dst;
-    end = s + src_size;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq         %0, %%mm7     \n\t"
-        "movq         %1, %%mm6     \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    mm_end = end - 15;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"   32%1            \n\t"
-        "movd         %1, %%mm0     \n\t"
-        "movd        3%1, %%mm3     \n\t"
-        "punpckldq   6%1, %%mm0     \n\t"
-        "punpckldq   9%1, %%mm3     \n\t"
-        "movq      %%mm0, %%mm1     \n\t"
-        "movq      %%mm0, %%mm2     \n\t"
-        "movq      %%mm3, %%mm4     \n\t"
-        "movq      %%mm3, %%mm5     \n\t"
-        "psllq        $7, %%mm0     \n\t"
-        "psllq        $7, %%mm3     \n\t"
-        "pand      %%mm7, %%mm0     \n\t"
-        "pand      %%mm7, %%mm3     \n\t"
-        "psrlq        $6, %%mm1     \n\t"
-        "psrlq        $6, %%mm4     \n\t"
-        "pand      %%mm6, %%mm1     \n\t"
-        "pand      %%mm6, %%mm4     \n\t"
-        "psrlq       $19, %%mm2     \n\t"
-        "psrlq       $19, %%mm5     \n\t"
-        "pand         %2, %%mm2     \n\t"
-        "pand         %2, %%mm5     \n\t"
-        "por       %%mm1, %%mm0     \n\t"
-        "por       %%mm4, %%mm3     \n\t"
-        "por       %%mm2, %%mm0     \n\t"
-        "por       %%mm5, %%mm3     \n\t"
-        "psllq       $16, %%mm3     \n\t"
-        "por       %%mm3, %%mm0     \n\t"
-        MOVNTQ"    %%mm0, %0        \n\t"
-        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 12;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        const int r = *s++;
-        const int g = *s++;
-        const int b = *s++;
-        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
-    }
-}
-
-/*
-  I use less accurate approximation here by simply left-shifting the input
-  value and filling the low order bits with zeroes. This method improves PNG
-  compression but this scheme cannot reproduce white exactly, since it does
-  not generate an all-ones maximum value; the net effect is to darken the
-  image slightly.
-
-  The better method should be "left bit replication":
-
-   4 3 2 1 0
-   ---------
-   1 1 0 1 1
-
-   7 6 5 4 3  2 1 0
-   ----------------
-   1 1 0 1 1  1 1 0
-   |=======|  |===|
-       |      leftmost bits repeated to fill open bits
-       |
-   original bits
-*/
-static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-#if HAVE_MMX
-    const uint16_t *mm_end;
-#endif
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t*)src;
-    end = s + src_size/2;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 7;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movq          %1, %%mm0    \n\t"
-        "movq          %1, %%mm1    \n\t"
-        "movq          %1, %%mm2    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %3, %%mm1    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "psllq         $3, %%mm0    \n\t"
-        "psrlq         $2, %%mm1    \n\t"
-        "psrlq         $7, %%mm2    \n\t"
-        "movq       %%mm0, %%mm3    \n\t"
-        "movq       %%mm1, %%mm4    \n\t"
-        "movq       %%mm2, %%mm5    \n\t"
-        "punpcklwd     %5, %%mm0    \n\t"
-        "punpcklwd     %5, %%mm1    \n\t"
-        "punpcklwd     %5, %%mm2    \n\t"
-        "punpckhwd     %5, %%mm3    \n\t"
-        "punpckhwd     %5, %%mm4    \n\t"
-        "punpckhwd     %5, %%mm5    \n\t"
-        "psllq         $8, %%mm1    \n\t"
-        "psllq        $16, %%mm2    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psllq         $8, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-
-        "movq       %%mm0, %%mm6    \n\t"
-        "movq       %%mm3, %%mm7    \n\t"
-
-        "movq         8%1, %%mm0    \n\t"
-        "movq         8%1, %%mm1    \n\t"
-        "movq         8%1, %%mm2    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %3, %%mm1    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "psllq         $3, %%mm0    \n\t"
-        "psrlq         $2, %%mm1    \n\t"
-        "psrlq         $7, %%mm2    \n\t"
-        "movq       %%mm0, %%mm3    \n\t"
-        "movq       %%mm1, %%mm4    \n\t"
-        "movq       %%mm2, %%mm5    \n\t"
-        "punpcklwd     %5, %%mm0    \n\t"
-        "punpcklwd     %5, %%mm1    \n\t"
-        "punpcklwd     %5, %%mm2    \n\t"
-        "punpckhwd     %5, %%mm3    \n\t"
-        "punpckhwd     %5, %%mm4    \n\t"
-        "punpckhwd     %5, %%mm5    \n\t"
-        "psllq         $8, %%mm1    \n\t"
-        "psllq        $16, %%mm2    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psllq         $8, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-
-        :"=m"(*d)
-        :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
-        :"memory");
-        /* borrowed 32 to 24 */
-        __asm__ volatile(
-        "movq       %%mm0, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "movq       %%mm6, %%mm0    \n\t"
-        "movq       %%mm7, %%mm1    \n\t"
-
-        "movq       %%mm4, %%mm6    \n\t"
-        "movq       %%mm5, %%mm7    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm1, %%mm3    \n\t"
-
-        "psrlq         $8, %%mm2    \n\t"
-        "psrlq         $8, %%mm3    \n\t"
-        "psrlq         $8, %%mm6    \n\t"
-        "psrlq         $8, %%mm7    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm1    \n\t"
-        "pand          %2, %%mm4    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "pand          %3, %%mm2    \n\t"
-        "pand          %3, %%mm3    \n\t"
-        "pand          %3, %%mm6    \n\t"
-        "pand          %3, %%mm7    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm3, %%mm1    \n\t"
-        "por        %%mm6, %%mm4    \n\t"
-        "por        %%mm7, %%mm5    \n\t"
-
-        "movq       %%mm1, %%mm2    \n\t"
-        "movq       %%mm4, %%mm3    \n\t"
-        "psllq        $48, %%mm2    \n\t"
-        "psllq        $32, %%mm3    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "pand          %5, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psrlq        $16, %%mm1    \n\t"
-        "psrlq        $32, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm3, %%mm1    \n\t"
-        "pand          %6, %%mm5    \n\t"
-        "por        %%mm5, %%mm4    \n\t"
-
-        MOVNTQ"     %%mm0,   %0     \n\t"
-        MOVNTQ"     %%mm1,  8%0     \n\t"
-        MOVNTQ"     %%mm4, 16%0"
-
-        :"=m"(*d)
-        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
-        :"memory");
-        d += 24;
-        s += 8;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x7C00)>>7;
-    }
-}
-
-static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-#if HAVE_MMX
-    const uint16_t *mm_end;
-#endif
-    uint8_t *d = (uint8_t *)dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    mm_end = end - 7;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movq          %1, %%mm0    \n\t"
-        "movq          %1, %%mm1    \n\t"
-        "movq          %1, %%mm2    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %3, %%mm1    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "psllq         $3, %%mm0    \n\t"
-        "psrlq         $3, %%mm1    \n\t"
-        "psrlq         $8, %%mm2    \n\t"
-        "movq       %%mm0, %%mm3    \n\t"
-        "movq       %%mm1, %%mm4    \n\t"
-        "movq       %%mm2, %%mm5    \n\t"
-        "punpcklwd     %5, %%mm0    \n\t"
-        "punpcklwd     %5, %%mm1    \n\t"
-        "punpcklwd     %5, %%mm2    \n\t"
-        "punpckhwd     %5, %%mm3    \n\t"
-        "punpckhwd     %5, %%mm4    \n\t"
-        "punpckhwd     %5, %%mm5    \n\t"
-        "psllq         $8, %%mm1    \n\t"
-        "psllq        $16, %%mm2    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psllq         $8, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-
-        "movq       %%mm0, %%mm6    \n\t"
-        "movq       %%mm3, %%mm7    \n\t"
-
-        "movq         8%1, %%mm0    \n\t"
-        "movq         8%1, %%mm1    \n\t"
-        "movq         8%1, %%mm2    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %3, %%mm1    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "psllq         $3, %%mm0    \n\t"
-        "psrlq         $3, %%mm1    \n\t"
-        "psrlq         $8, %%mm2    \n\t"
-        "movq       %%mm0, %%mm3    \n\t"
-        "movq       %%mm1, %%mm4    \n\t"
-        "movq       %%mm2, %%mm5    \n\t"
-        "punpcklwd     %5, %%mm0    \n\t"
-        "punpcklwd     %5, %%mm1    \n\t"
-        "punpcklwd     %5, %%mm2    \n\t"
-        "punpckhwd     %5, %%mm3    \n\t"
-        "punpckhwd     %5, %%mm4    \n\t"
-        "punpckhwd     %5, %%mm5    \n\t"
-        "psllq         $8, %%mm1    \n\t"
-        "psllq        $16, %%mm2    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psllq         $8, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        :"=m"(*d)
-        :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
-        :"memory");
-        /* borrowed 32 to 24 */
-        __asm__ volatile(
-        "movq       %%mm0, %%mm4    \n\t"
-        "movq       %%mm3, %%mm5    \n\t"
-        "movq       %%mm6, %%mm0    \n\t"
-        "movq       %%mm7, %%mm1    \n\t"
-
-        "movq       %%mm4, %%mm6    \n\t"
-        "movq       %%mm5, %%mm7    \n\t"
-        "movq       %%mm0, %%mm2    \n\t"
-        "movq       %%mm1, %%mm3    \n\t"
-
-        "psrlq         $8, %%mm2    \n\t"
-        "psrlq         $8, %%mm3    \n\t"
-        "psrlq         $8, %%mm6    \n\t"
-        "psrlq         $8, %%mm7    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm1    \n\t"
-        "pand          %2, %%mm4    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "pand          %3, %%mm2    \n\t"
-        "pand          %3, %%mm3    \n\t"
-        "pand          %3, %%mm6    \n\t"
-        "pand          %3, %%mm7    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "por        %%mm3, %%mm1    \n\t"
-        "por        %%mm6, %%mm4    \n\t"
-        "por        %%mm7, %%mm5    \n\t"
-
-        "movq       %%mm1, %%mm2    \n\t"
-        "movq       %%mm4, %%mm3    \n\t"
-        "psllq        $48, %%mm2    \n\t"
-        "psllq        $32, %%mm3    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "pand          %5, %%mm3    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psrlq        $16, %%mm1    \n\t"
-        "psrlq        $32, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm3, %%mm1    \n\t"
-        "pand          %6, %%mm5    \n\t"
-        "por        %%mm5, %%mm4    \n\t"
-
-        MOVNTQ"     %%mm0,   %0     \n\t"
-        MOVNTQ"     %%mm1,  8%0     \n\t"
-        MOVNTQ"     %%mm4, 16%0"
-
-        :"=m"(*d)
-        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
-        :"memory");
-        d += 24;
-        s += 8;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        register uint16_t bgr;
-        bgr = *s++;
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0xF800)>>8;
-    }
-}
-
-static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-#if HAVE_MMX
-    const uint16_t *mm_end;
-#endif
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
-    end = s + src_size/2;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
-    mm_end = end - 3;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movq          %1, %%mm0    \n\t"
-        "movq          %1, %%mm1    \n\t"
-        "movq          %1, %%mm2    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %3, %%mm1    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "psllq         $3, %%mm0    \n\t"
-        "psrlq         $2, %%mm1    \n\t"
-        "psrlq         $7, %%mm2    \n\t"
-        "movq       %%mm0, %%mm3    \n\t"
-        "movq       %%mm1, %%mm4    \n\t"
-        "movq       %%mm2, %%mm5    \n\t"
-        "punpcklwd  %%mm7, %%mm0    \n\t"
-        "punpcklwd  %%mm7, %%mm1    \n\t"
-        "punpcklwd  %%mm7, %%mm2    \n\t"
-        "punpckhwd  %%mm7, %%mm3    \n\t"
-        "punpckhwd  %%mm7, %%mm4    \n\t"
-        "punpckhwd  %%mm7, %%mm5    \n\t"
-        "psllq         $8, %%mm1    \n\t"
-        "psllq        $16, %%mm2    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psllq         $8, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        MOVNTQ"     %%mm0,  %0      \n\t"
-        MOVNTQ"     %%mm3, 8%0      \n\t"
-        :"=m"(*d)
-        :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
-        :"memory");
-        d += 16;
-        s += 4;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-#if 0 //slightly slower on Athlon
-        int bgr= *s++;
-        *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
-#else
-        register uint16_t bgr;
-        bgr = *s++;
-#ifdef WORDS_BIGENDIAN
-        *d++ = 255;
-        *d++ = (bgr&0x7C00)>>7;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x1F)<<3;
-#else
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x7C00)>>7;
-        *d++ = 255;
-#endif
-
-#endif
-    }
-}
-
-static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    const uint16_t *end;
-#if HAVE_MMX
-    const uint16_t *mm_end;
-#endif
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t*)src;
-    end = s + src_size/2;
-#if HAVE_MMX
-    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
-    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
-    mm_end = end - 3;
-    while (s < mm_end)
-    {
-        __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movq          %1, %%mm0    \n\t"
-        "movq          %1, %%mm1    \n\t"
-        "movq          %1, %%mm2    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %3, %%mm1    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "psllq         $3, %%mm0    \n\t"
-        "psrlq         $3, %%mm1    \n\t"
-        "psrlq         $8, %%mm2    \n\t"
-        "movq       %%mm0, %%mm3    \n\t"
-        "movq       %%mm1, %%mm4    \n\t"
-        "movq       %%mm2, %%mm5    \n\t"
-        "punpcklwd  %%mm7, %%mm0    \n\t"
-        "punpcklwd  %%mm7, %%mm1    \n\t"
-        "punpcklwd  %%mm7, %%mm2    \n\t"
-        "punpckhwd  %%mm7, %%mm3    \n\t"
-        "punpckhwd  %%mm7, %%mm4    \n\t"
-        "punpckhwd  %%mm7, %%mm5    \n\t"
-        "psllq         $8, %%mm1    \n\t"
-        "psllq        $16, %%mm2    \n\t"
-        "por        %%mm1, %%mm0    \n\t"
-        "por        %%mm2, %%mm0    \n\t"
-        "psllq         $8, %%mm4    \n\t"
-        "psllq        $16, %%mm5    \n\t"
-        "por        %%mm4, %%mm3    \n\t"
-        "por        %%mm5, %%mm3    \n\t"
-        MOVNTQ"     %%mm0, %0       \n\t"
-        MOVNTQ"     %%mm3, 8%0      \n\t"
-        :"=m"(*d)
-        :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
-        :"memory");
-        d += 16;
-        s += 4;
-    }
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    while (s < end)
-    {
-        register uint16_t bgr;
-        bgr = *s++;
-#ifdef WORDS_BIGENDIAN
-        *d++ = 255;
-        *d++ = (bgr&0xF800)>>8;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0x1F)<<3;
-#else
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0xF800)>>8;
-        *d++ = 255;
-#endif
-    }
-}
-
-static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    long idx = 15 - src_size;
-    const uint8_t *s = src-idx;
-    uint8_t *d = dst-idx;
-#if HAVE_MMX
-    __asm__ volatile(
-    "test          %0, %0           \n\t"
-    "jns           2f               \n\t"
-    PREFETCH"       (%1, %0)        \n\t"
-    "movq          %3, %%mm7        \n\t"
-    "pxor          %4, %%mm7        \n\t"
-    "movq       %%mm7, %%mm6        \n\t"
-    "pxor          %5, %%mm7        \n\t"
-    ASMALIGN(4)
-    "1:                             \n\t"
-    PREFETCH"     32(%1, %0)        \n\t"
-    "movq           (%1, %0), %%mm0 \n\t"
-    "movq          8(%1, %0), %%mm1 \n\t"
-# if HAVE_MMX2
-    "pshufw      $177, %%mm0, %%mm3 \n\t"
-    "pshufw      $177, %%mm1, %%mm5 \n\t"
-    "pand       %%mm7, %%mm0        \n\t"
-    "pand       %%mm6, %%mm3        \n\t"
-    "pand       %%mm7, %%mm1        \n\t"
-    "pand       %%mm6, %%mm5        \n\t"
-    "por        %%mm3, %%mm0        \n\t"
-    "por        %%mm5, %%mm1        \n\t"
-# else
-    "movq       %%mm0, %%mm2        \n\t"
-    "movq       %%mm1, %%mm4        \n\t"
-    "pand       %%mm7, %%mm0        \n\t"
-    "pand       %%mm6, %%mm2        \n\t"
-    "pand       %%mm7, %%mm1        \n\t"
-    "pand       %%mm6, %%mm4        \n\t"
-    "movq       %%mm2, %%mm3        \n\t"
-    "movq       %%mm4, %%mm5        \n\t"
-    "pslld        $16, %%mm2        \n\t"
-    "psrld        $16, %%mm3        \n\t"
-    "pslld        $16, %%mm4        \n\t"
-    "psrld        $16, %%mm5        \n\t"
-    "por        %%mm2, %%mm0        \n\t"
-    "por        %%mm4, %%mm1        \n\t"
-    "por        %%mm3, %%mm0        \n\t"
-    "por        %%mm5, %%mm1        \n\t"
-# endif
-    MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
-    MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
-    "add          $16, %0           \n\t"
-    "js            1b               \n\t"
-    SFENCE"                         \n\t"
-    EMMS"                           \n\t"
-    "2:                             \n\t"
-    : "+&r"(idx)
-    : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
-    : "memory");
-#endif
-    for (; idx<15; idx+=4) {
-        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
-        v &= 0xff00ff;
-        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
-    }
-}
-
-static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
-{
-    unsigned i;
-#if HAVE_MMX
-    long mmx_size= 23 - src_size;
-    __asm__ volatile (
-    "test             %%"REG_a", %%"REG_a"          \n\t"
-    "jns                     2f                     \n\t"
-    "movq     "MANGLE(mask24r)", %%mm5              \n\t"
-    "movq     "MANGLE(mask24g)", %%mm6              \n\t"
-    "movq     "MANGLE(mask24b)", %%mm7              \n\t"
-    ASMALIGN(4)
-    "1:                                             \n\t"
-    PREFETCH" 32(%1, %%"REG_a")                     \n\t"
-    "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-    "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
-    "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
-    "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
-    "pand                 %%mm5, %%mm0              \n\t"
-    "pand                 %%mm6, %%mm1              \n\t"
-    "pand                 %%mm7, %%mm2              \n\t"
-    "por                  %%mm0, %%mm1              \n\t"
-    "por                  %%mm2, %%mm1              \n\t"
-    "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-    MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
-    "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
-    "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
-    "pand                 %%mm7, %%mm0              \n\t"
-    "pand                 %%mm5, %%mm1              \n\t"
-    "pand                 %%mm6, %%mm2              \n\t"
-    "por                  %%mm0, %%mm1              \n\t"
-    "por                  %%mm2, %%mm1              \n\t"
-    "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
-    MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
-    "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
-    "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
-    "pand                 %%mm6, %%mm0              \n\t"
-    "pand                 %%mm7, %%mm1              \n\t"
-    "pand                 %%mm5, %%mm2              \n\t"
-    "por                  %%mm0, %%mm1              \n\t"
-    "por                  %%mm2, %%mm1              \n\t"
-    MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
-    "add                    $24, %%"REG_a"          \n\t"
-    " js                     1b                     \n\t"
-    "2:                                             \n\t"
-    : "+a" (mmx_size)
-    : "r" (src-mmx_size), "r"(dst-mmx_size)
-    );
-
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-
-    if (mmx_size==23) return; //finished, was multiple of 8
-
-    src+= src_size;
-    dst+= src_size;
-    src_size= 23-mmx_size;
-    src-= src_size;
-    dst-= src_size;
-#endif
-    for (i=0; i<src_size; i+=3)
-    {
-        register uint8_t x;
-        x          = src[i + 2];
-        dst[i + 1] = src[i + 1];
-        dst[i + 2] = src[i + 0];
-        dst[i + 0] = x;
-    }
-}
-
-static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
-{
-    long y;
-    const long chromWidth= width>>1;
-    for (y=0; y<height; y++)
-    {
-#if HAVE_MMX
-//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
-        __asm__ volatile(
-        "xor                 %%"REG_a", %%"REG_a"   \n\t"
-        ASMALIGN(4)
-        "1:                                         \n\t"
-        PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
-        PREFETCH"    32(%2, %%"REG_a")              \n\t"
-        PREFETCH"    32(%3, %%"REG_a")              \n\t"
-        "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
-        "movq                    %%mm0, %%mm2       \n\t" // U(0)
-        "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
-        "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
-        "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
-
-        "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
-        "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
-        "movq                    %%mm3, %%mm4       \n\t" // Y(0)
-        "movq                    %%mm5, %%mm6       \n\t" // Y(8)
-        "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
-        "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
-        "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
-        "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
-
-        MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
-        MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
-        MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
-        MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
-
-        "add                        $8, %%"REG_a"   \n\t"
-        "cmp                        %4, %%"REG_a"   \n\t"
-        " jb                        1b              \n\t"
-        ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-        : "%"REG_a
-        );
-#else
-
-#if ARCH_ALPHA && HAVE_MVI
-#define pl2yuy2(n)                  \
-    y1 = yc[n];                     \
-    y2 = yc2[n];                    \
-    u = uc[n];                      \
-    v = vc[n];                      \
-    __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1));  \
-    __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2));  \
-    __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u));    \
-    __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v));    \
-    yuv1 = (u << 8) + (v << 24);                \
-    yuv2 = yuv1 + y2;               \
-    yuv1 += y1;                     \
-    qdst[n]  = yuv1;                \
-    qdst2[n] = yuv2;
-
-        int i;
-        uint64_t *qdst = (uint64_t *) dst;
-        uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
-        const uint32_t *yc = (uint32_t *) ysrc;
-        const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
-        const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
-        for (i = 0; i < chromWidth; i += 8){
-            uint64_t y1, y2, yuv1, yuv2;
-            uint64_t u, v;
-            /* Prefetch */
-            __asm__("ldq $31,64(%0)" :: "r"(yc));
-            __asm__("ldq $31,64(%0)" :: "r"(yc2));
-            __asm__("ldq $31,64(%0)" :: "r"(uc));
-            __asm__("ldq $31,64(%0)" :: "r"(vc));
-
-            pl2yuy2(0);
-            pl2yuy2(1);
-            pl2yuy2(2);
-            pl2yuy2(3);
-
-            yc    += 4;
-            yc2   += 4;
-            uc    += 4;
-            vc    += 4;
-            qdst  += 4;
-            qdst2 += 4;
-        }
-        y++;
-        ysrc += lumStride;
-        dst += dstStride;
-
-#elif HAVE_FAST_64BIT
-        int i;
-        uint64_t *ldst = (uint64_t *) dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-        for (i = 0; i < chromWidth; i += 2){
-            uint64_t k, l;
-            k = yc[0] + (uc[0] << 8) +
-                (yc[1] << 16) + (vc[0] << 24);
-            l = yc[2] + (uc[1] << 8) +
-                (yc[3] << 16) + (vc[1] << 24);
-            *ldst++ = k + (l << 32);
-            yc += 4;
-            uc += 2;
-            vc += 2;
-        }
-
-#else
-        int i, *idst = (int32_t *) dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-        for (i = 0; i < chromWidth; i++){
-#ifdef WORDS_BIGENDIAN
-            *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
-                (yc[1] << 8) + (vc[0] << 0);
-#else
-            *idst++ = yc[0] + (uc[0] << 8) +
-                (yc[1] << 16) + (vc[0] << 24);
-#endif
-            yc += 2;
-            uc++;
-            vc++;
-        }
-#endif
-#endif
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
-        {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst  += dstStride;
-    }
-#if HAVE_MMX
-__asm__(    EMMS"       \n\t"
-        SFENCE"     \n\t"
-        :::"memory");
-#endif
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
-{
-    //FIXME interpolate chroma
-    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
-}
-
-static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
-{
-    long y;
-    const long chromWidth= width>>1;
-    for (y=0; y<height; y++)
-    {
-#if HAVE_MMX
-//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
-        __asm__ volatile(
-        "xor                %%"REG_a", %%"REG_a"    \n\t"
-        ASMALIGN(4)
-        "1:                                         \n\t"
-        PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
-        PREFETCH"   32(%2, %%"REG_a")               \n\t"
-        PREFETCH"   32(%3, %%"REG_a")               \n\t"
-        "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
-        "movq                   %%mm0, %%mm2        \n\t" // U(0)
-        "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
-        "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
-        "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
-
-        "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
-        "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
-        "movq                   %%mm0, %%mm4        \n\t" // Y(0)
-        "movq                   %%mm2, %%mm6        \n\t" // Y(8)
-        "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
-        "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
-        "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
-        "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
-
-        MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
-        MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
-        MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
-        MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
-
-        "add                       $8, %%"REG_a"    \n\t"
-        "cmp                       %4, %%"REG_a"    \n\t"
-        " jb                       1b               \n\t"
-        ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-        : "%"REG_a
-        );
-#else
-//FIXME adapt the Alpha ASM code from yv12->yuy2
-
-#if HAVE_FAST_64BIT
-        int i;
-        uint64_t *ldst = (uint64_t *) dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-        for (i = 0; i < chromWidth; i += 2){
-            uint64_t k, l;
-            k = uc[0] + (yc[0] << 8) +
-                (vc[0] << 16) + (yc[1] << 24);
-            l = uc[1] + (yc[2] << 8) +
-                (vc[1] << 16) + (yc[3] << 24);
-            *ldst++ = k + (l << 32);
-            yc += 4;
-            uc += 2;
-            vc += 2;
-        }
-
-#else
-        int i, *idst = (int32_t *) dst;
-        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
-        for (i = 0; i < chromWidth; i++){
-#ifdef WORDS_BIGENDIAN
-            *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
-                (vc[0] << 8) + (yc[1] << 0);
-#else
-            *idst++ = uc[0] + (yc[0] << 8) +
-               (vc[0] << 16) + (yc[1] << 24);
-#endif
-            yc += 2;
-            uc++;
-            vc++;
-        }
-#endif
-#endif
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
-        {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst += dstStride;
-    }
-#if HAVE_MMX
-__asm__(    EMMS"       \n\t"
-        SFENCE"     \n\t"
-        :::"memory");
-#endif
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
-{
-    //FIXME interpolate chroma
-    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
-}
-
-/**
- * Width should be a multiple of 16.
- */
-static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
-{
-    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
-}
-
-/**
- * Width should be a multiple of 16.
- */
-static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
-{
-    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
-{
-    long y;
-    const long chromWidth= width>>1;
-    for (y=0; y<height; y+=2)
-    {
-#if HAVE_MMX
-        __asm__ volatile(
-        "xor                 %%"REG_a", %%"REG_a"   \n\t"
-        "pcmpeqw                 %%mm7, %%mm7       \n\t"
-        "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
-        ASMALIGN(4)
-        "1:                \n\t"
-        PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-        "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-        "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
-        "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
-        "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
-        "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
-        "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
-        "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
-        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
-        "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
-        "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
-
-        MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
-
-        "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
-        "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
-        "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
-        "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
-        "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
-        "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
-        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
-        "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
-        "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
-        "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
-
-        MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
-
-        "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
-        "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
-        "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
-        "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
-        "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
-        "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
-        "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
-        "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
-
-        MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
-        MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
-
-        "add                        $8, %%"REG_a"   \n\t"
-        "cmp                        %4, %%"REG_a"   \n\t"
-        " jb                        1b              \n\t"
-        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-        : "memory", "%"REG_a
-        );
-
-        ydst += lumStride;
-        src  += srcStride;
-
-        __asm__ volatile(
-        "xor                 %%"REG_a", %%"REG_a"   \n\t"
-        ASMALIGN(4)
-        "1:                                         \n\t"
-        PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-        "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-        "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
-        "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
-        "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
-        "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
-        "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
-        "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
-        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
-        "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
-        "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
-
-        MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
-        MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
-
-        "add                        $8, %%"REG_a"   \n\t"
-        "cmp                        %4, %%"REG_a"   \n\t"
-        " jb                        1b              \n\t"
-
-        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-        : "memory", "%"REG_a
-        );
-#else
-        long i;
-        for (i=0; i<chromWidth; i++)
-        {
-            ydst[2*i+0]     = src[4*i+0];
-            udst[i]     = src[4*i+1];
-            ydst[2*i+1]     = src[4*i+2];
-            vdst[i]     = src[4*i+3];
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        for (i=0; i<chromWidth; i++)
-        {
-            ydst[2*i+0]     = src[4*i+0];
-            ydst[2*i+1]     = src[4*i+2];
-        }
-#endif
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-#if HAVE_MMX
-__asm__ volatile(   EMMS"       \n\t"
-                SFENCE"     \n\t"
-                :::"memory");
-#endif
-}
-
-static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
-                                      uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height, long lumStride, long chromStride)
-{
-    /* Y Plane */
-    memcpy(ydst, ysrc, width*height);
-
-    /* XXX: implement upscaling for U,V */
-}
-
-static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
-{
-    long x,y;
-
-    dst[0]= src[0];
-
-    // first line
-    for (x=0; x<srcWidth-1; x++){
-        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
-        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
-    }
-    dst[2*srcWidth-1]= src[srcWidth-1];
-
-        dst+= dstStride;
-
-    for (y=1; y<srcHeight; y++){
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-        const long mmxSize= srcWidth&~15;
-        __asm__ volatile(
-        "mov           %4, %%"REG_a"            \n\t"
-        "1:                                     \n\t"
-        "movq         (%0, %%"REG_a"), %%mm0    \n\t"
-        "movq         (%1, %%"REG_a"), %%mm1    \n\t"
-        "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
-        "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
-        "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
-        "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
-        PAVGB"                  %%mm0, %%mm5    \n\t"
-        PAVGB"                  %%mm0, %%mm3    \n\t"
-        PAVGB"                  %%mm0, %%mm5    \n\t"
-        PAVGB"                  %%mm0, %%mm3    \n\t"
-        PAVGB"                  %%mm1, %%mm4    \n\t"
-        PAVGB"                  %%mm1, %%mm2    \n\t"
-        PAVGB"                  %%mm1, %%mm4    \n\t"
-        PAVGB"                  %%mm1, %%mm2    \n\t"
-        "movq                   %%mm5, %%mm7    \n\t"
-        "movq                   %%mm4, %%mm6    \n\t"
-        "punpcklbw              %%mm3, %%mm5    \n\t"
-        "punpckhbw              %%mm3, %%mm7    \n\t"
-        "punpcklbw              %%mm2, %%mm4    \n\t"
-        "punpckhbw              %%mm2, %%mm6    \n\t"
-#if 1
-        MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-        MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-        MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-        MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#else
-        "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-        "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-        "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-        "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#endif
-        "add                       $8, %%"REG_a"            \n\t"
-        " js                       1b                       \n\t"
-        :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
-           "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
-           "g" (-mmxSize)
-        : "%"REG_a
-
-        );
-#else
-        const long mmxSize=1;
-#endif
-        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
-        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
-
-        for (x=mmxSize-1; x<srcWidth-1; x++){
-            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
-            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
-            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
-            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
-        }
-        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
-        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
-
-        dst+=dstStride*2;
-        src+=srcStride;
-    }
-
-    // last line
-#if 1
-    dst[0]= src[0];
-
-    for (x=0; x<srcWidth-1; x++){
-        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
-        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
-    }
-    dst[2*srcWidth-1]= src[srcWidth-1];
-#else
-    for (x=0; x<srcWidth; x++){
-        dst[2*x+0]=
-        dst[2*x+1]= src[x];
-    }
-#endif
-
-#if HAVE_MMX
-__asm__ volatile(   EMMS"       \n\t"
-                SFENCE"     \n\t"
-                :::"memory");
-#endif
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line, others are ignored.
- * FIXME: Write HQ version.
- */
-static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
-{
-    long y;
-    const long chromWidth= width>>1;
-    for (y=0; y<height; y+=2)
-    {
-#if HAVE_MMX
-        __asm__ volatile(
-        "xor                 %%"REG_a", %%"REG_a"   \n\t"
-        "pcmpeqw             %%mm7, %%mm7   \n\t"
-        "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
-        ASMALIGN(4)
-        "1:                                 \n\t"
-        PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-        "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
-        "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
-        "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
-        "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
-        "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
-        "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
-        "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
-        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
-        "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
-        "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
-
-        MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
-
-        "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
-        "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
-        "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
-        "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
-        "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
-        "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
-        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
-        "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
-        "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
-        "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
-
-        MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
-
-        "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
-        "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
-        "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
-        "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
-        "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
-        "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
-        "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
-        "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
-
-        MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
-        MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
-
-        "add                    $8, %%"REG_a"   \n\t"
-        "cmp                    %4, %%"REG_a"   \n\t"
-        " jb                    1b          \n\t"
-        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-        : "memory", "%"REG_a
-        );
-
-        ydst += lumStride;
-        src  += srcStride;
-
-        __asm__ volatile(
-        "xor                 %%"REG_a", %%"REG_a"   \n\t"
-        ASMALIGN(4)
-        "1:                                 \n\t"
-        PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-        "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
-        "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
-        "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
-        "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
-        "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
-        "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
-        "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
-        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
-        "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
-        "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
-
-        MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
-        MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
-
-        "add                    $8, %%"REG_a"   \n\t"
-        "cmp                    %4, %%"REG_a"   \n\t"
-        " jb                    1b          \n\t"
-
-        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-        : "memory", "%"REG_a
-        );
-#else
-        long i;
-        for (i=0; i<chromWidth; i++)
-        {
-            udst[i]     = src[4*i+0];
-            ydst[2*i+0] = src[4*i+1];
-            vdst[i]     = src[4*i+2];
-            ydst[2*i+1] = src[4*i+3];
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        for (i=0; i<chromWidth; i++)
-        {
-            ydst[2*i+0] = src[4*i+1];
-            ydst[2*i+1] = src[4*i+3];
-        }
-#endif
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-#if HAVE_MMX
-__asm__ volatile(   EMMS"       \n\t"
-                SFENCE"     \n\t"
-                :::"memory");
-#endif
-}
-
-/**
- * Height should be a multiple of 2 and width should be a multiple of 2.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- * Chrominance data is only taken from every second line,
- * others are ignored in the C version.
- * FIXME: Write HQ version.
- */
-static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                       long width, long height,
-                                       long lumStride, long chromStride, long srcStride)
-{
-    long y;
-    const long chromWidth= width>>1;
-#if HAVE_MMX
-    for (y=0; y<height-2; y+=2)
-    {
-        long i;
-        for (i=0; i<2; i++)
-        {
-            __asm__ volatile(
-            "mov                        %2, %%"REG_a"   \n\t"
-            "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
-            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-            "pxor                    %%mm7, %%mm7       \n\t"
-            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
-            ASMALIGN(4)
-            "1:                                         \n\t"
-            PREFETCH"    64(%0, %%"REG_d")              \n\t"
-            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-            "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm0       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "pmaddwd                 %%mm6, %%mm0       \n\t"
-            "pmaddwd                 %%mm6, %%mm1       \n\t"
-            "pmaddwd                 %%mm6, %%mm2       \n\t"
-            "pmaddwd                 %%mm6, %%mm3       \n\t"
-#ifndef FAST_BGR2YV12
-            "psrad                      $8, %%mm0       \n\t"
-            "psrad                      $8, %%mm1       \n\t"
-            "psrad                      $8, %%mm2       \n\t"
-            "psrad                      $8, %%mm3       \n\t"
-#endif
-            "packssdw                %%mm1, %%mm0       \n\t"
-            "packssdw                %%mm3, %%mm2       \n\t"
-            "pmaddwd                 %%mm5, %%mm0       \n\t"
-            "pmaddwd                 %%mm5, %%mm2       \n\t"
-            "packssdw                %%mm2, %%mm0       \n\t"
-            "psraw                      $7, %%mm0       \n\t"
-
-            "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-            "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "pmaddwd                 %%mm6, %%mm4       \n\t"
-            "pmaddwd                 %%mm6, %%mm1       \n\t"
-            "pmaddwd                 %%mm6, %%mm2       \n\t"
-            "pmaddwd                 %%mm6, %%mm3       \n\t"
-#ifndef FAST_BGR2YV12
-            "psrad                      $8, %%mm4       \n\t"
-            "psrad                      $8, %%mm1       \n\t"
-            "psrad                      $8, %%mm2       \n\t"
-            "psrad                      $8, %%mm3       \n\t"
-#endif
-            "packssdw                %%mm1, %%mm4       \n\t"
-            "packssdw                %%mm3, %%mm2       \n\t"
-            "pmaddwd                 %%mm5, %%mm4       \n\t"
-            "pmaddwd                 %%mm5, %%mm2       \n\t"
-            "add                       $24, %%"REG_d"   \n\t"
-            "packssdw                %%mm2, %%mm4       \n\t"
-            "psraw                      $7, %%mm4       \n\t"
-
-            "packuswb                %%mm4, %%mm0       \n\t"
-            "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
-
-            MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
-            "add                        $8,      %%"REG_a"  \n\t"
-            " js                        1b                  \n\t"
-            : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
-            : "%"REG_a, "%"REG_d
-            );
-            ydst += lumStride;
-            src  += srcStride;
-        }
-        src -= srcStride*2;
-        __asm__ volatile(
-        "mov                        %4, %%"REG_a"   \n\t"
-        "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-        "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
-        "pxor                    %%mm7, %%mm7       \n\t"
-        "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
-        "add                 %%"REG_d", %%"REG_d"   \n\t"
-        ASMALIGN(4)
-        "1:                                         \n\t"
-        PREFETCH"    64(%0, %%"REG_d")              \n\t"
-        PREFETCH"    64(%1, %%"REG_d")              \n\t"
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-        "movq          (%0, %%"REG_d"), %%mm0       \n\t"
-        "movq          (%1, %%"REG_d"), %%mm1       \n\t"
-        "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
-        "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
-        PAVGB"                   %%mm1, %%mm0       \n\t"
-        PAVGB"                   %%mm3, %%mm2       \n\t"
-        "movq                    %%mm0, %%mm1       \n\t"
-        "movq                    %%mm2, %%mm3       \n\t"
-        "psrlq                     $24, %%mm0       \n\t"
-        "psrlq                     $24, %%mm2       \n\t"
-        PAVGB"                   %%mm1, %%mm0       \n\t"
-        PAVGB"                   %%mm3, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm0       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-        "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-        "movd          (%1, %%"REG_d"), %%mm1       \n\t"
-        "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
-        "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
-        "punpcklbw               %%mm7, %%mm0       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "paddw                   %%mm1, %%mm0       \n\t"
-        "paddw                   %%mm3, %%mm2       \n\t"
-        "paddw                   %%mm2, %%mm0       \n\t"
-        "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
-        "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
-        "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
-        "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
-        "punpcklbw               %%mm7, %%mm4       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "paddw                   %%mm1, %%mm4       \n\t"
-        "paddw                   %%mm3, %%mm2       \n\t"
-        "paddw                   %%mm4, %%mm2       \n\t"
-        "psrlw                      $2, %%mm0       \n\t"
-        "psrlw                      $2, %%mm2       \n\t"
-#endif
-        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
-        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
-
-        "pmaddwd                 %%mm0, %%mm1       \n\t"
-        "pmaddwd                 %%mm2, %%mm3       \n\t"
-        "pmaddwd                 %%mm6, %%mm0       \n\t"
-        "pmaddwd                 %%mm6, %%mm2       \n\t"
-#ifndef FAST_BGR2YV12
-        "psrad                      $8, %%mm0       \n\t"
-        "psrad                      $8, %%mm1       \n\t"
-        "psrad                      $8, %%mm2       \n\t"
-        "psrad                      $8, %%mm3       \n\t"
-#endif
-        "packssdw                %%mm2, %%mm0       \n\t"
-        "packssdw                %%mm3, %%mm1       \n\t"
-        "pmaddwd                 %%mm5, %%mm0       \n\t"
-        "pmaddwd                 %%mm5, %%mm1       \n\t"
-        "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
-        "psraw                      $7, %%mm0       \n\t"
-
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-        "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
-        "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
-        "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
-        "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
-        PAVGB"                   %%mm1, %%mm4       \n\t"
-        PAVGB"                   %%mm3, %%mm2       \n\t"
-        "movq                    %%mm4, %%mm1       \n\t"
-        "movq                    %%mm2, %%mm3       \n\t"
-        "psrlq                     $24, %%mm4       \n\t"
-        "psrlq                     $24, %%mm2       \n\t"
-        PAVGB"                   %%mm1, %%mm4       \n\t"
-        PAVGB"                   %%mm3, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm4       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-        "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-        "movd        12(%1, %%"REG_d"), %%mm1       \n\t"
-        "movd        15(%0, %%"REG_d"), %%mm2       \n\t"
-        "movd        15(%1, %%"REG_d"), %%mm3       \n\t"
-        "punpcklbw               %%mm7, %%mm4       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "paddw                   %%mm1, %%mm4       \n\t"
-        "paddw                   %%mm3, %%mm2       \n\t"
-        "paddw                   %%mm2, %%mm4       \n\t"
-        "movd        18(%0, %%"REG_d"), %%mm5       \n\t"
-        "movd        18(%1, %%"REG_d"), %%mm1       \n\t"
-        "movd        21(%0, %%"REG_d"), %%mm2       \n\t"
-        "movd        21(%1, %%"REG_d"), %%mm3       \n\t"
-        "punpcklbw               %%mm7, %%mm5       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "paddw                   %%mm1, %%mm5       \n\t"
-        "paddw                   %%mm3, %%mm2       \n\t"
-        "paddw                   %%mm5, %%mm2       \n\t"
-        "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-        "psrlw                      $2, %%mm4       \n\t"
-        "psrlw                      $2, %%mm2       \n\t"
-#endif
-        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
-        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
-
-        "pmaddwd                 %%mm4, %%mm1       \n\t"
-        "pmaddwd                 %%mm2, %%mm3       \n\t"
-        "pmaddwd                 %%mm6, %%mm4       \n\t"
-        "pmaddwd                 %%mm6, %%mm2       \n\t"
-#ifndef FAST_BGR2YV12
-        "psrad                      $8, %%mm4       \n\t"
-        "psrad                      $8, %%mm1       \n\t"
-        "psrad                      $8, %%mm2       \n\t"
-        "psrad                      $8, %%mm3       \n\t"
-#endif
-        "packssdw                %%mm2, %%mm4       \n\t"
-        "packssdw                %%mm3, %%mm1       \n\t"
-        "pmaddwd                 %%mm5, %%mm4       \n\t"
-        "pmaddwd                 %%mm5, %%mm1       \n\t"
-        "add                       $24, %%"REG_d"   \n\t"
-        "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
-        "psraw                      $7, %%mm4       \n\t"
-
-        "movq                    %%mm0, %%mm1           \n\t"
-        "punpckldq               %%mm4, %%mm0           \n\t"
-        "punpckhdq               %%mm4, %%mm1           \n\t"
-        "packsswb                %%mm1, %%mm0           \n\t"
-        "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t"
-        "movd                    %%mm0, (%2, %%"REG_a") \n\t"
-        "punpckhdq               %%mm0, %%mm0           \n\t"
-        "movd                    %%mm0, (%3, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"       \n\t"
-        " js                        1b                  \n\t"
-        : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
-        : "%"REG_a, "%"REG_d
-        );
-
-        udst += chromStride;
-        vdst += chromStride;
-        src  += srcStride*2;
-    }
-
-    __asm__ volatile(   EMMS"       \n\t"
-                    SFENCE"     \n\t"
-                    :::"memory");
-#else
-    y=0;
-#endif
-    for (; y<height; y+=2)
-    {
-        long i;
-        for (i=0; i<chromWidth; i++)
-        {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
-
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
-            unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
-
-            udst[i]     = U;
-            vdst[i]     = V;
-            ydst[2*i]   = Y;
-
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
-
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
-        }
-        ydst += lumStride;
-        src  += srcStride;
-
-        for (i=0; i<chromWidth; i++)
-        {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
-
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-
-            ydst[2*i]     = Y;
-
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
-
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
-        }
-        udst += chromStride;
-        vdst += chromStride;
-        ydst += lumStride;
-        src  += srcStride;
-    }
-}
-
-static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
-                             long width, long height, long src1Stride,
-                             long src2Stride, long dstStride){
-    long h;
-
-    for (h=0; h < height; h++)
-    {
-        long w;
-
-#if HAVE_MMX
-#if HAVE_SSE2
-        __asm__(
-        "xor              %%"REG_a", %%"REG_a"  \n\t"
-        "1:                                     \n\t"
-        PREFETCH" 64(%1, %%"REG_a")             \n\t"
-        PREFETCH" 64(%2, %%"REG_a")             \n\t"
-        "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
-        "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
-        "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
-        "punpcklbw           %%xmm2, %%xmm0     \n\t"
-        "punpckhbw           %%xmm2, %%xmm1     \n\t"
-        "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
-        "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
-        "add                    $16, %%"REG_a"  \n\t"
-        "cmp                     %3, %%"REG_a"  \n\t"
-        " jb                     1b             \n\t"
-        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
-        : "memory", "%"REG_a""
-        );
-#else
-        __asm__(
-        "xor %%"REG_a", %%"REG_a"               \n\t"
-        "1:                                     \n\t"
-        PREFETCH" 64(%1, %%"REG_a")             \n\t"
-        PREFETCH" 64(%2, %%"REG_a")             \n\t"
-        "movq       (%1, %%"REG_a"), %%mm0      \n\t"
-        "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
-        "movq                 %%mm0, %%mm1      \n\t"
-        "movq                 %%mm2, %%mm3      \n\t"
-        "movq       (%2, %%"REG_a"), %%mm4      \n\t"
-        "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
-        "punpcklbw            %%mm4, %%mm0      \n\t"
-        "punpckhbw            %%mm4, %%mm1      \n\t"
-        "punpcklbw            %%mm5, %%mm2      \n\t"
-        "punpckhbw            %%mm5, %%mm3      \n\t"
-        MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
-        MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
-        MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
-        MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
-        "add                    $16, %%"REG_a"  \n\t"
-        "cmp                     %3, %%"REG_a"  \n\t"
-        " jb                     1b             \n\t"
-        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
-        : "memory", "%"REG_a
-        );
-#endif
-        for (w= (width&(~15)); w < width; w++)
-        {
-            dest[2*w+0] = src1[w];
-            dest[2*w+1] = src2[w];
-        }
-#else
-        for (w=0; w < width; w++)
-        {
-            dest[2*w+0] = src1[w];
-            dest[2*w+1] = src2[w];
-        }
-#endif
-        dest += dstStride;
-                src1 += src1Stride;
-                src2 += src2Stride;
-    }
-#if HAVE_MMX
-    __asm__(
-        EMMS"       \n\t"
-        SFENCE"     \n\t"
-        ::: "memory"
-        );
-#endif
-}
-
-static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
-                                       uint8_t *dst1, uint8_t *dst2,
-                                       long width, long height,
-                                       long srcStride1, long srcStride2,
-                                       long dstStride1, long dstStride2)
-{
-    long y,x,w,h;
-    w=width/2; h=height/2;
-#if HAVE_MMX
-    __asm__ volatile(
-    PREFETCH" %0    \n\t"
-    PREFETCH" %1    \n\t"
-    ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
-#endif
-    for (y=0;y<h;y++){
-    const uint8_t* s1=src1+srcStride1*(y>>1);
-    uint8_t* d=dst1+dstStride1*y;
-    x=0;
-#if HAVE_MMX
-    for (;x<w-31;x+=32)
-    {
-        __asm__ volatile(
-        PREFETCH"   32%1        \n\t"
-        "movq         %1, %%mm0 \n\t"
-        "movq        8%1, %%mm2 \n\t"
-        "movq       16%1, %%mm4 \n\t"
-        "movq       24%1, %%mm6 \n\t"
-        "movq      %%mm0, %%mm1 \n\t"
-        "movq      %%mm2, %%mm3 \n\t"
-        "movq      %%mm4, %%mm5 \n\t"
-        "movq      %%mm6, %%mm7 \n\t"
-        "punpcklbw %%mm0, %%mm0 \n\t"
-        "punpckhbw %%mm1, %%mm1 \n\t"
-        "punpcklbw %%mm2, %%mm2 \n\t"
-        "punpckhbw %%mm3, %%mm3 \n\t"
-        "punpcklbw %%mm4, %%mm4 \n\t"
-        "punpckhbw %%mm5, %%mm5 \n\t"
-        "punpcklbw %%mm6, %%mm6 \n\t"
-        "punpckhbw %%mm7, %%mm7 \n\t"
-        MOVNTQ"    %%mm0,   %0  \n\t"
-        MOVNTQ"    %%mm1,  8%0  \n\t"
-        MOVNTQ"    %%mm2, 16%0  \n\t"
-        MOVNTQ"    %%mm3, 24%0  \n\t"
-        MOVNTQ"    %%mm4, 32%0  \n\t"
-        MOVNTQ"    %%mm5, 40%0  \n\t"
-        MOVNTQ"    %%mm6, 48%0  \n\t"
-        MOVNTQ"    %%mm7, 56%0"
-        :"=m"(d[2*x])
-        :"m"(s1[x])
-        :"memory");
-    }
-#endif
-    for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
-    }
-    for (y=0;y<h;y++){
-    const uint8_t* s2=src2+srcStride2*(y>>1);
-    uint8_t* d=dst2+dstStride2*y;
-    x=0;
-#if HAVE_MMX
-    for (;x<w-31;x+=32)
-    {
-        __asm__ volatile(
-        PREFETCH"   32%1        \n\t"
-        "movq         %1, %%mm0 \n\t"
-        "movq        8%1, %%mm2 \n\t"
-        "movq       16%1, %%mm4 \n\t"
-        "movq       24%1, %%mm6 \n\t"
-        "movq      %%mm0, %%mm1 \n\t"
-        "movq      %%mm2, %%mm3 \n\t"
-        "movq      %%mm4, %%mm5 \n\t"
-        "movq      %%mm6, %%mm7 \n\t"
-        "punpcklbw %%mm0, %%mm0 \n\t"
-        "punpckhbw %%mm1, %%mm1 \n\t"
-        "punpcklbw %%mm2, %%mm2 \n\t"
-        "punpckhbw %%mm3, %%mm3 \n\t"
-        "punpcklbw %%mm4, %%mm4 \n\t"
-        "punpckhbw %%mm5, %%mm5 \n\t"
-        "punpcklbw %%mm6, %%mm6 \n\t"
-        "punpckhbw %%mm7, %%mm7 \n\t"
-        MOVNTQ"    %%mm0,   %0  \n\t"
-        MOVNTQ"    %%mm1,  8%0  \n\t"
-        MOVNTQ"    %%mm2, 16%0  \n\t"
-        MOVNTQ"    %%mm3, 24%0  \n\t"
-        MOVNTQ"    %%mm4, 32%0  \n\t"
-        MOVNTQ"    %%mm5, 40%0  \n\t"
-        MOVNTQ"    %%mm6, 48%0  \n\t"
-        MOVNTQ"    %%mm7, 56%0"
-        :"=m"(d[2*x])
-        :"m"(s2[x])
-        :"memory");
-    }
-#endif
-    for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
-    }
-#if HAVE_MMX
-    __asm__(
-        EMMS"       \n\t"
-        SFENCE"     \n\t"
-        ::: "memory"
-        );
-#endif
-}
-
-static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                                        uint8_t *dst,
-                                        long width, long height,
-                                        long srcStride1, long srcStride2,
-                                        long srcStride3, long dstStride)
-{
-    long y,x,w,h;
-    w=width/2; h=height;
-    for (y=0;y<h;y++){
-    const uint8_t* yp=src1+srcStride1*y;
-    const uint8_t* up=src2+srcStride2*(y>>2);
-    const uint8_t* vp=src3+srcStride3*(y>>2);
-    uint8_t* d=dst+dstStride*y;
-    x=0;
-#if HAVE_MMX
-    for (;x<w-7;x+=8)
-    {
-        __asm__ volatile(
-        PREFETCH"   32(%1, %0)          \n\t"
-        PREFETCH"   32(%2, %0)          \n\t"
-        PREFETCH"   32(%3, %0)          \n\t"
-        "movq      (%1, %0, 4), %%mm0   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
-        "movq         (%2, %0), %%mm1   \n\t" /* U0U1U2U3U4U5U6U7 */
-        "movq         (%3, %0), %%mm2   \n\t" /* V0V1V2V3V4V5V6V7 */
-        "movq            %%mm0, %%mm3   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
-        "movq            %%mm1, %%mm4   \n\t" /* U0U1U2U3U4U5U6U7 */
-        "movq            %%mm2, %%mm5   \n\t" /* V0V1V2V3V4V5V6V7 */
-        "punpcklbw       %%mm1, %%mm1   \n\t" /* U0U0 U1U1 U2U2 U3U3 */
-        "punpcklbw       %%mm2, %%mm2   \n\t" /* V0V0 V1V1 V2V2 V3V3 */
-        "punpckhbw       %%mm4, %%mm4   \n\t" /* U4U4 U5U5 U6U6 U7U7 */
-        "punpckhbw       %%mm5, %%mm5   \n\t" /* V4V4 V5V5 V6V6 V7V7 */
-
-        "movq            %%mm1, %%mm6   \n\t"
-        "punpcklbw       %%mm2, %%mm1   \n\t" /* U0V0 U0V0 U1V1 U1V1*/
-        "punpcklbw       %%mm1, %%mm0   \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
-        "punpckhbw       %%mm1, %%mm3   \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
-        MOVNTQ"          %%mm0,  (%4, %0, 8)    \n\t"
-        MOVNTQ"          %%mm3, 8(%4, %0, 8)    \n\t"
-
-        "punpckhbw       %%mm2, %%mm6   \n\t" /* U2V2 U2V2 U3V3 U3V3*/
-        "movq     8(%1, %0, 4), %%mm0   \n\t"
-        "movq            %%mm0, %%mm3   \n\t"
-        "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U2 Y V2 Y U2 Y V2*/
-        "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U3 Y V3 Y U3 Y V3*/
-        MOVNTQ"          %%mm0, 16(%4, %0, 8)   \n\t"
-        MOVNTQ"          %%mm3, 24(%4, %0, 8)   \n\t"
-
-        "movq            %%mm4, %%mm6   \n\t"
-        "movq    16(%1, %0, 4), %%mm0   \n\t"
-        "movq            %%mm0, %%mm3   \n\t"
-        "punpcklbw       %%mm5, %%mm4   \n\t"
-        "punpcklbw       %%mm4, %%mm0   \n\t" /* Y U4 Y V4 Y U4 Y V4*/
-        "punpckhbw       %%mm4, %%mm3   \n\t" /* Y U5 Y V5 Y U5 Y V5*/
-        MOVNTQ"          %%mm0, 32(%4, %0, 8)   \n\t"
-        MOVNTQ"          %%mm3, 40(%4, %0, 8)   \n\t"
-
-        "punpckhbw       %%mm5, %%mm6   \n\t"
-        "movq    24(%1, %0, 4), %%mm0   \n\t"
-        "movq            %%mm0, %%mm3   \n\t"
-        "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U6 Y V6 Y U6 Y V6*/
-        "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U7 Y V7 Y U7 Y V7*/
-        MOVNTQ"          %%mm0, 48(%4, %0, 8)   \n\t"
-        MOVNTQ"          %%mm3, 56(%4, %0, 8)   \n\t"
-
-        : "+r" (x)
-        : "r"(yp), "r" (up), "r"(vp), "r"(d)
-        :"memory");
-    }
-#endif
-    for (; x<w; x++)
-    {
-        const long x2 = x<<2;
-        d[8*x+0] = yp[x2];
-        d[8*x+1] = up[x];
-        d[8*x+2] = yp[x2+1];
-        d[8*x+3] = vp[x];
-        d[8*x+4] = yp[x2+2];
-        d[8*x+5] = up[x];
-        d[8*x+6] = yp[x2+3];
-        d[8*x+7] = vp[x];
-    }
-    }
-#if HAVE_MMX
-    __asm__(
-        EMMS"       \n\t"
-        SFENCE"     \n\t"
-        ::: "memory"
-        );
-#endif
-}
-
-static inline void RENAME(rgb2rgb_init)(void){
-    rgb15to16       = RENAME(rgb15to16);
-    rgb15tobgr24    = RENAME(rgb15tobgr24);
-    rgb15to32       = RENAME(rgb15to32);
-    rgb16tobgr24    = RENAME(rgb16tobgr24);
-    rgb16to32       = RENAME(rgb16to32);
-    rgb16to15       = RENAME(rgb16to15);
-    rgb24tobgr16    = RENAME(rgb24tobgr16);
-    rgb24tobgr15    = RENAME(rgb24tobgr15);
-    rgb24tobgr32    = RENAME(rgb24tobgr32);
-    rgb32to16       = RENAME(rgb32to16);
-    rgb32to15       = RENAME(rgb32to15);
-    rgb32tobgr24    = RENAME(rgb32tobgr24);
-    rgb24to15       = RENAME(rgb24to15);
-    rgb24to16       = RENAME(rgb24to16);
-    rgb24tobgr24    = RENAME(rgb24tobgr24);
-    rgb32tobgr32    = RENAME(rgb32tobgr32);
-    rgb32tobgr16    = RENAME(rgb32tobgr16);
-    rgb32tobgr15    = RENAME(rgb32tobgr15);
-    yv12toyuy2      = RENAME(yv12toyuy2);
-    yv12touyvy      = RENAME(yv12touyvy);
-    yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
-    yuv422ptouyvy   = RENAME(yuv422ptouyvy);
-    yuy2toyv12      = RENAME(yuy2toyv12);
-//    uyvytoyv12      = RENAME(uyvytoyv12);
-//    yvu9toyv12      = RENAME(yvu9toyv12);
-    planar2x        = RENAME(planar2x);
-    rgb24toyv12     = RENAME(rgb24toyv12);
-    interleaveBytes = RENAME(interleaveBytes);
-    vu9_to_vu12     = RENAME(vu9_to_vu12);
-    yvu9_to_yuy2    = RENAME(yvu9_to_yuy2);
-}
diff --git a/libswscale/swscale-example.c b/libswscale/swscale-example.c
deleted file mode 100644
index 87b9ba027d..0000000000
--- a/libswscale/swscale-example.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <stdarg.h>
-
-#undef HAVE_AV_CONFIG_H
-#include "libavutil/avutil.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){
-    int x,y;
-    uint64_t ssd=0;
-
-//printf("%d %d\n", w, h);
-
-    for (y=0; y<h; y++){
-        for (x=0; x<w; x++){
-            int d= src1[x + y*stride1] - src2[x + y*stride2];
-            ssd+= d*d;
-//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 );
-        }
-//printf("\n");
-    }
-    return ssd;
-}
-
-// test by ref -> src -> dst -> out & compare out against ref
-// ref & out are YV12
-static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat,
-                  int srcW, int srcH, int dstW, int dstH, int flags){
-    uint8_t *src[3];
-    uint8_t *dst[3];
-    uint8_t *out[3];
-    int srcStride[3], dstStride[3];
-    int i;
-    uint64_t ssdY, ssdU, ssdV;
-    struct SwsContext *srcContext, *dstContext, *outContext;
-    int res;
-
-    res = 0;
-    for (i=0; i<3; i++){
-        // avoid stride % bpp != 0
-        if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24)
-            srcStride[i]= srcW*3;
-        else
-            srcStride[i]= srcW*4;
-
-        if (dstFormat==PIX_FMT_RGB24 || dstFormat==PIX_FMT_BGR24)
-            dstStride[i]= dstW*3;
-        else
-            dstStride[i]= dstW*4;
-
-        src[i]= (uint8_t*) malloc(srcStride[i]*srcH);
-        dst[i]= (uint8_t*) malloc(dstStride[i]*dstH);
-        out[i]= (uint8_t*) malloc(refStride[i]*h);
-        if (!src[i] || !dst[i] || !out[i]) {
-            perror("Malloc");
-            res = -1;
-
-            goto end;
-        }
-    }
-
-    dstContext = outContext = NULL;
-    srcContext= sws_getContext(w, h, PIX_FMT_YUV420P, srcW, srcH, srcFormat, flags, NULL, NULL, NULL);
-    if (!srcContext) {
-        fprintf(stderr, "Failed to get %s ---> %s\n",
-                sws_format_name(PIX_FMT_YUV420P),
-                sws_format_name(srcFormat));
-        res = -1;
-
-        goto end;
-    }
-    dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL, NULL);
-    if (!dstContext) {
-        fprintf(stderr, "Failed to get %s ---> %s\n",
-                sws_format_name(srcFormat),
-                sws_format_name(dstFormat));
-        res = -1;
-
-        goto end;
-    }
-    outContext= sws_getContext(dstW, dstH, dstFormat, w, h, PIX_FMT_YUV420P, flags, NULL, NULL, NULL);
-    if (!outContext) {
-        fprintf(stderr, "Failed to get %s ---> %s\n",
-                sws_format_name(dstFormat),
-                sws_format_name(PIX_FMT_YUV420P));
-        res = -1;
-
-        goto end;
-    }
-//    printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
-//        (int)src[0], (int)src[1], (int)src[2]);
-
-    sws_scale(srcContext, ref, refStride, 0, h   , src, srcStride);
-    sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
-    sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride);
-
-    ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
-    ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
-    ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
-
-    if (srcFormat == PIX_FMT_GRAY8 || dstFormat==PIX_FMT_GRAY8) ssdU=ssdV=0; //FIXME check that output is really gray
-
-    ssdY/= w*h;
-    ssdU/= w*h/4;
-    ssdV/= w*h/4;
-
-    printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n",
-           sws_format_name(srcFormat), srcW, srcH,
-           sws_format_name(dstFormat), dstW, dstH,
-           flags, ssdY, ssdU, ssdV);
-    fflush(stdout);
-
-    end:
-
-    sws_freeContext(srcContext);
-    sws_freeContext(dstContext);
-    sws_freeContext(outContext);
-
-    for (i=0; i<3; i++){
-        free(src[i]);
-        free(dst[i]);
-        free(out[i]);
-    }
-
-    return res;
-}
-
-static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
-    enum PixelFormat srcFormat, dstFormat;
-    int srcW, srcH, dstW, dstH;
-    int flags;
-
-    for (srcFormat = 0; srcFormat < PIX_FMT_NB; srcFormat++) {
-        for (dstFormat = 0; dstFormat < PIX_FMT_NB; dstFormat++) {
-            printf("%s -> %s\n",
-                   sws_format_name(srcFormat),
-                   sws_format_name(dstFormat));
-            fflush(stdout);
-
-            srcW= w;
-            srcH= h;
-            for (dstW=w - w/3; dstW<= 4*w/3; dstW+= w/3){
-                for (dstH=h - h/3; dstH<= 4*h/3; dstH+= h/3){
-                    for (flags=1; flags<33; flags*=2) {
-                        int res;
-
-                        res = doTest(src, stride, w, h, srcFormat, dstFormat,
-                                     srcW, srcH, dstW, dstH, flags);
-                        if (res < 0) {
-                            dstW = 4 * w / 3;
-                            dstH = 4 * h / 3;
-                            flags = 33;
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-#define W 96
-#define H 96
-
-int main(int argc, char **argv){
-    uint8_t *rgb_data = malloc (W*H*4);
-    uint8_t *rgb_src[3]= {rgb_data, NULL, NULL};
-    int rgb_stride[3]={4*W, 0, 0};
-    uint8_t *data = malloc (3*W*H);
-    uint8_t *src[3]= {data, data+W*H, data+W*H*2};
-    int stride[3]={W, W, W};
-    int x, y;
-    struct SwsContext *sws;
-
-    sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUV420P, 2, NULL, NULL, NULL);
-
-    for (y=0; y<H; y++){
-        for (x=0; x<W*4; x++){
-            rgb_data[ x + y*4*W]= random();
-        }
-    }
-    sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
-
-    selfTest(src, stride, W, H);
-
-    return 123;
-}
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
deleted file mode 100644
index 7c335f1680..0000000000
--- a/libswscale/swscale.c
+++ /dev/null
@@ -1,3198 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * the C code (not assembly, mmx, ...) of this file can be used
- * under the LGPL license too
- */
-
-/*
-  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
-  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
-  {BGR,RGB}{1,4,8,15,16} support dithering
-
-  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
-  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
-  x -> x
-  YUV9 -> YV12
-  YUV9/YV12 -> Y800
-  Y800 -> YUV9/YV12
-  BGR24 -> BGR32 & RGB24 -> RGB32
-  BGR32 -> BGR24 & RGB32 -> RGB24
-  BGR15 -> BGR16
-*/
-
-/*
-tested special converters (most are tested actually, but I did not write it down ...)
- YV12 -> BGR16
- YV12 -> YV12
- BGR15 -> BGR16
- BGR16 -> BGR16
- YVU9 -> YV12
-
-untested special converters
-  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
-  YV12/I420 -> YV12/I420
-  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
-  BGR24 -> BGR32 & RGB24 -> RGB32
-  BGR32 -> BGR24 & RGB32 -> RGB24
-  BGR24 -> YV12
-*/
-
-#define _SVID_SOURCE //needed for MAP_ANONYMOUS
-#include <inttypes.h>
-#include <string.h>
-#include <math.h>
-#include <stdio.h>
-#include <unistd.h>
-#include "config.h"
-#include <assert.h>
-#if HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-#endif
-#include "swscale.h"
-#include "swscale_internal.h"
-#include "rgb2rgb.h"
-#include "libavutil/x86_cpu.h"
-#include "libavutil/bswap.h"
-
-unsigned swscale_version(void)
-{
-    return LIBSWSCALE_VERSION_INT;
-}
-
-#undef MOVNTQ
-#undef PAVGB
-
-//#undef HAVE_MMX2
-//#define HAVE_AMD3DNOW
-//#undef HAVE_MMX
-//#undef ARCH_X86
-//#define WORDS_BIGENDIAN
-#define DITHER1XBPP
-
-#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
-
-#define RET 0xC3 //near return opcode for x86
-
-#ifdef M_PI
-#define PI M_PI
-#else
-#define PI 3.14159265358979323846
-#endif
-
-#define isSupportedIn(x)    (       \
-           (x)==PIX_FMT_YUV420P     \
-        || (x)==PIX_FMT_YUVA420P    \
-        || (x)==PIX_FMT_YUYV422     \
-        || (x)==PIX_FMT_UYVY422     \
-        || (x)==PIX_FMT_RGB32       \
-        || (x)==PIX_FMT_RGB32_1     \
-        || (x)==PIX_FMT_BGR24       \
-        || (x)==PIX_FMT_BGR565      \
-        || (x)==PIX_FMT_BGR555      \
-        || (x)==PIX_FMT_BGR32       \
-        || (x)==PIX_FMT_BGR32_1     \
-        || (x)==PIX_FMT_RGB24       \
-        || (x)==PIX_FMT_RGB565      \
-        || (x)==PIX_FMT_RGB555      \
-        || (x)==PIX_FMT_GRAY8       \
-        || (x)==PIX_FMT_YUV410P     \
-        || (x)==PIX_FMT_YUV440P     \
-        || (x)==PIX_FMT_GRAY16BE    \
-        || (x)==PIX_FMT_GRAY16LE    \
-        || (x)==PIX_FMT_YUV444P     \
-        || (x)==PIX_FMT_YUV422P     \
-        || (x)==PIX_FMT_YUV411P     \
-        || (x)==PIX_FMT_PAL8        \
-        || (x)==PIX_FMT_BGR8        \
-        || (x)==PIX_FMT_RGB8        \
-        || (x)==PIX_FMT_BGR4_BYTE   \
-        || (x)==PIX_FMT_RGB4_BYTE   \
-        || (x)==PIX_FMT_YUV440P     \
-        || (x)==PIX_FMT_MONOWHITE   \
-        || (x)==PIX_FMT_MONOBLACK   \
-    )
-#define isSupportedOut(x)   (       \
-           (x)==PIX_FMT_YUV420P     \
-        || (x)==PIX_FMT_YUYV422     \
-        || (x)==PIX_FMT_UYVY422     \
-        || (x)==PIX_FMT_YUV444P     \
-        || (x)==PIX_FMT_YUV422P     \
-        || (x)==PIX_FMT_YUV411P     \
-        || isRGB(x)                 \
-        || isBGR(x)                 \
-        || (x)==PIX_FMT_NV12        \
-        || (x)==PIX_FMT_NV21        \
-        || (x)==PIX_FMT_GRAY16BE    \
-        || (x)==PIX_FMT_GRAY16LE    \
-        || (x)==PIX_FMT_GRAY8       \
-        || (x)==PIX_FMT_YUV410P     \
-        || (x)==PIX_FMT_YUV440P     \
-    )
-#define isPacked(x)         (       \
-           (x)==PIX_FMT_PAL8        \
-        || (x)==PIX_FMT_YUYV422     \
-        || (x)==PIX_FMT_UYVY422     \
-        || isRGB(x)                 \
-        || isBGR(x)                 \
-    )
-#define usePal(x)           (       \
-           (x)==PIX_FMT_PAL8        \
-        || (x)==PIX_FMT_BGR4_BYTE   \
-        || (x)==PIX_FMT_RGB4_BYTE   \
-        || (x)==PIX_FMT_BGR8        \
-        || (x)==PIX_FMT_RGB8        \
-    )
-
-#define RGB2YUV_SHIFT 15
-#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
-#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
-
-extern const int32_t ff_yuv2rgb_coeffs[8][4];
-
-static const double rgb2yuv_table[8][9]={
-    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
-    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
-    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
-    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
-    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
-    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
-    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
-    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
-};
-
-/*
-NOTES
-Special versions: fast Y 1:1 scaling (no interpolation in y direction)
-
-TODO
-more intelligent misalignment avoidance for the horizontal scaler
-write special vertical cubic upscale version
-optimize C code (YV12 / minmax)
-add support for packed pixel YUV input & output
-add support for Y8 output
-optimize BGR24 & BGR32
-add BGR4 output support
-write special BGR->BGR scaler
-*/
-
-#if ARCH_X86 && CONFIG_GPL
-DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
-DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
-DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
-DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
-DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
-DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
-DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
-DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
-        0x0103010301030103LL,
-        0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
-        0x0602060206020602LL,
-        0x0004000400040004LL,};
-
-DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
-DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
-DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
-DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
-
-DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
-DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
-
-#ifdef FAST_BGR2YV12
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
-#else
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
-#endif /* FAST_BGR2YV12 */
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
-    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
-    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
-};
-
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
-
-#endif /* ARCH_X86 && CONFIG_GPL */
-
-// clipping helper table for C implementations:
-static unsigned char clip_table[768];
-
-static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
-
-static const uint8_t  __attribute__((aligned(8))) dither_2x2_4[2][8]={
-{  1,   3,   1,   3,   1,   3,   1,   3, },
-{  2,   0,   2,   0,   2,   0,   2,   0, },
-};
-
-static const uint8_t  __attribute__((aligned(8))) dither_2x2_8[2][8]={
-{  6,   2,   6,   2,   6,   2,   6,   2, },
-{  0,   4,   0,   4,   0,   4,   0,   4, },
-};
-
-const uint8_t  __attribute__((aligned(8))) dither_8x8_32[8][8]={
-{ 17,   9,  23,  15,  16,   8,  22,  14, },
-{  5,  29,   3,  27,   4,  28,   2,  26, },
-{ 21,  13,  19,  11,  20,  12,  18,  10, },
-{  0,  24,   6,  30,   1,  25,   7,  31, },
-{ 16,   8,  22,  14,  17,   9,  23,  15, },
-{  4,  28,   2,  26,   5,  29,   3,  27, },
-{ 20,  12,  18,  10,  21,  13,  19,  11, },
-{  1,  25,   7,  31,   0,  24,   6,  30, },
-};
-
-#if 0
-const uint8_t  __attribute__((aligned(8))) dither_8x8_64[8][8]={
-{  0,  48,  12,  60,   3,  51,  15,  63, },
-{ 32,  16,  44,  28,  35,  19,  47,  31, },
-{  8,  56,   4,  52,  11,  59,   7,  55, },
-{ 40,  24,  36,  20,  43,  27,  39,  23, },
-{  2,  50,  14,  62,   1,  49,  13,  61, },
-{ 34,  18,  46,  30,  33,  17,  45,  29, },
-{ 10,  58,   6,  54,   9,  57,   5,  53, },
-{ 42,  26,  38,  22,  41,  25,  37,  21, },
-};
-#endif
-
-const uint8_t  __attribute__((aligned(8))) dither_8x8_73[8][8]={
-{  0,  55,  14,  68,   3,  58,  17,  72, },
-{ 37,  18,  50,  32,  40,  22,  54,  35, },
-{  9,  64,   5,  59,  13,  67,   8,  63, },
-{ 46,  27,  41,  23,  49,  31,  44,  26, },
-{  2,  57,  16,  71,   1,  56,  15,  70, },
-{ 39,  21,  52,  34,  38,  19,  51,  33, },
-{ 11,  66,   7,  62,  10,  65,   6,  60, },
-{ 48,  30,  43,  25,  47,  29,  42,  24, },
-};
-
-#if 0
-const uint8_t  __attribute__((aligned(8))) dither_8x8_128[8][8]={
-{ 68,  36,  92,  60,  66,  34,  90,  58, },
-{ 20, 116,  12, 108,  18, 114,  10, 106, },
-{ 84,  52,  76,  44,  82,  50,  74,  42, },
-{  0,  96,  24, 120,   6, 102,  30, 126, },
-{ 64,  32,  88,  56,  70,  38,  94,  62, },
-{ 16, 112,   8, 104,  22, 118,  14, 110, },
-{ 80,  48,  72,  40,  86,  54,  78,  46, },
-{  4, 100,  28, 124,   2,  98,  26, 122, },
-};
-#endif
-
-#if 1
-const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
-{117,  62, 158, 103, 113,  58, 155, 100, },
-{ 34, 199,  21, 186,  31, 196,  17, 182, },
-{144,  89, 131,  76, 141,  86, 127,  72, },
-{  0, 165,  41, 206,  10, 175,  52, 217, },
-{110,  55, 151,  96, 120,  65, 162, 107, },
-{ 28, 193,  14, 179,  38, 203,  24, 189, },
-{138,  83, 124,  69, 148,  93, 134,  79, },
-{  7, 172,  48, 213,   3, 168,  45, 210, },
-};
-#elif 1
-// tries to correct a gamma of 1.5
-const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
-{  0, 143,  18, 200,   2, 156,  25, 215, },
-{ 78,  28, 125,  64,  89,  36, 138,  74, },
-{ 10, 180,   3, 161,  16, 195,   8, 175, },
-{109,  51,  93,  38, 121,  60, 105,  47, },
-{  1, 152,  23, 210,   0, 147,  20, 205, },
-{ 85,  33, 134,  71,  81,  30, 130,  67, },
-{ 14, 190,   6, 171,  12, 185,   5, 166, },
-{117,  57, 101,  44, 113,  54,  97,  41, },
-};
-#elif 1
-// tries to correct a gamma of 2.0
-const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
-{  0, 124,   8, 193,   0, 140,  12, 213, },
-{ 55,  14, 104,  42,  66,  19, 119,  52, },
-{  3, 168,   1, 145,   6, 187,   3, 162, },
-{ 86,  31,  70,  21,  99,  39,  82,  28, },
-{  0, 134,  11, 206,   0, 129,   9, 200, },
-{ 62,  17, 114,  48,  58,  16, 109,  45, },
-{  5, 181,   2, 157,   4, 175,   1, 151, },
-{ 95,  36,  78,  26,  90,  34,  74,  24, },
-};
-#else
-// tries to correct a gamma of 2.5
-const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
-{  0, 107,   3, 187,   0, 125,   6, 212, },
-{ 39,   7,  86,  28,  49,  11, 102,  36, },
-{  1, 158,   0, 131,   3, 180,   1, 151, },
-{ 68,  19,  52,  12,  81,  25,  64,  17, },
-{  0, 119,   5, 203,   0, 113,   4, 195, },
-{ 45,   9,  96,  33,  42,   8,  91,  30, },
-{  2, 172,   1, 144,   2, 165,   0, 137, },
-{ 77,  23,  60,  15,  72,  21,  56,  14, },
-};
-#endif
-
-const char *sws_format_name(enum PixelFormat format)
-{
-    switch (format) {
-        case PIX_FMT_YUV420P:
-            return "yuv420p";
-        case PIX_FMT_YUVA420P:
-            return "yuva420p";
-        case PIX_FMT_YUYV422:
-            return "yuyv422";
-        case PIX_FMT_RGB24:
-            return "rgb24";
-        case PIX_FMT_BGR24:
-            return "bgr24";
-        case PIX_FMT_YUV422P:
-            return "yuv422p";
-        case PIX_FMT_YUV444P:
-            return "yuv444p";
-        case PIX_FMT_RGB32:
-            return "rgb32";
-        case PIX_FMT_YUV410P:
-            return "yuv410p";
-        case PIX_FMT_YUV411P:
-            return "yuv411p";
-        case PIX_FMT_RGB565:
-            return "rgb565";
-        case PIX_FMT_RGB555:
-            return "rgb555";
-        case PIX_FMT_GRAY16BE:
-            return "gray16be";
-        case PIX_FMT_GRAY16LE:
-            return "gray16le";
-        case PIX_FMT_GRAY8:
-            return "gray8";
-        case PIX_FMT_MONOWHITE:
-            return "mono white";
-        case PIX_FMT_MONOBLACK:
-            return "mono black";
-        case PIX_FMT_PAL8:
-            return "Palette";
-        case PIX_FMT_YUVJ420P:
-            return "yuvj420p";
-        case PIX_FMT_YUVJ422P:
-            return "yuvj422p";
-        case PIX_FMT_YUVJ444P:
-            return "yuvj444p";
-        case PIX_FMT_XVMC_MPEG2_MC:
-            return "xvmc_mpeg2_mc";
-        case PIX_FMT_XVMC_MPEG2_IDCT:
-            return "xvmc_mpeg2_idct";
-        case PIX_FMT_UYVY422:
-            return "uyvy422";
-        case PIX_FMT_UYYVYY411:
-            return "uyyvyy411";
-        case PIX_FMT_RGB32_1:
-            return "rgb32x";
-        case PIX_FMT_BGR32_1:
-            return "bgr32x";
-        case PIX_FMT_BGR32:
-            return "bgr32";
-        case PIX_FMT_BGR565:
-            return "bgr565";
-        case PIX_FMT_BGR555:
-            return "bgr555";
-        case PIX_FMT_BGR8:
-            return "bgr8";
-        case PIX_FMT_BGR4:
-            return "bgr4";
-        case PIX_FMT_BGR4_BYTE:
-            return "bgr4 byte";
-        case PIX_FMT_RGB8:
-            return "rgb8";
-        case PIX_FMT_RGB4:
-            return "rgb4";
-        case PIX_FMT_RGB4_BYTE:
-            return "rgb4 byte";
-        case PIX_FMT_NV12:
-            return "nv12";
-        case PIX_FMT_NV21:
-            return "nv21";
-        case PIX_FMT_YUV440P:
-            return "yuv440p";
-        case PIX_FMT_VDPAU_H264:
-            return "vdpau_h264";
-        case PIX_FMT_VDPAU_MPEG1:
-            return "vdpau_mpeg1";
-        case PIX_FMT_VDPAU_MPEG2:
-            return "vdpau_mpeg2";
-        case PIX_FMT_VDPAU_WMV3:
-            return "vdpau_wmv3";
-        case PIX_FMT_VDPAU_VC1:
-            return "vdpau_vc1";
-        default:
-            return "Unknown format";
-    }
-}
-
-static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
-{
-    //FIXME Optimize (just quickly written not optimized..)
-    int i;
-    for (i=0; i<dstW; i++)
-    {
-        int val=1<<18;
-        int j;
-        for (j=0; j<lumFilterSize; j++)
-            val += lumSrc[j][i] * lumFilter[j];
-
-        dest[i]= av_clip_uint8(val>>19);
-    }
-
-    if (uDest)
-        for (i=0; i<chrDstW; i++)
-        {
-            int u=1<<18;
-            int v=1<<18;
-            int j;
-            for (j=0; j<chrFilterSize; j++)
-            {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
-            }
-
-            uDest[i]= av_clip_uint8(u>>19);
-            vDest[i]= av_clip_uint8(v>>19);
-        }
-}
-
-static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
-{
-    //FIXME Optimize (just quickly written not optimized..)
-    int i;
-    for (i=0; i<dstW; i++)
-    {
-        int val=1<<18;
-        int j;
-        for (j=0; j<lumFilterSize; j++)
-            val += lumSrc[j][i] * lumFilter[j];
-
-        dest[i]= av_clip_uint8(val>>19);
-    }
-
-    if (!uDest)
-        return;
-
-    if (dstFormat == PIX_FMT_NV12)
-        for (i=0; i<chrDstW; i++)
-        {
-            int u=1<<18;
-            int v=1<<18;
-            int j;
-            for (j=0; j<chrFilterSize; j++)
-            {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
-            }
-
-            uDest[2*i]= av_clip_uint8(u>>19);
-            uDest[2*i+1]= av_clip_uint8(v>>19);
-        }
-    else
-        for (i=0; i<chrDstW; i++)
-        {
-            int u=1<<18;
-            int v=1<<18;
-            int j;
-            for (j=0; j<chrFilterSize; j++)
-            {
-                u += chrSrc[j][i] * chrFilter[j];
-                v += chrSrc[j][i + VOFW] * chrFilter[j];
-            }
-
-            uDest[2*i]= av_clip_uint8(v>>19);
-            uDest[2*i+1]= av_clip_uint8(u>>19);
-        }
-}
-
-#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
-    for (i=0; i<(dstW>>1); i++){\
-        int j;\
-        int Y1 = 1<<18;\
-        int Y2 = 1<<18;\
-        int U  = 1<<18;\
-        int V  = 1<<18;\
-        type av_unused *r, *b, *g;\
-        const int i2= 2*i;\
-        \
-        for (j=0; j<lumFilterSize; j++)\
-        {\
-            Y1 += lumSrc[j][i2] * lumFilter[j];\
-            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
-        }\
-        for (j=0; j<chrFilterSize; j++)\
-        {\
-            U += chrSrc[j][i] * chrFilter[j];\
-            V += chrSrc[j][i+VOFW] * chrFilter[j];\
-        }\
-        Y1>>=19;\
-        Y2>>=19;\
-        U >>=19;\
-        V >>=19;\
-
-#define YSCALE_YUV_2_PACKEDX_C(type) \
-        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
-        if ((Y1|Y2|U|V)&256)\
-        {\
-            if (Y1>255)   Y1=255; \
-            else if (Y1<0)Y1=0;   \
-            if (Y2>255)   Y2=255; \
-            else if (Y2<0)Y2=0;   \
-            if (U>255)    U=255;  \
-            else if (U<0) U=0;    \
-            if (V>255)    V=255;  \
-            else if (V<0) V=0;    \
-        }
-
-#define YSCALE_YUV_2_PACKEDX_FULL_C \
-    for (i=0; i<dstW; i++){\
-        int j;\
-        int Y = 0;\
-        int U = -128<<19;\
-        int V = -128<<19;\
-        int R,G,B;\
-        \
-        for (j=0; j<lumFilterSize; j++){\
-            Y += lumSrc[j][i     ] * lumFilter[j];\
-        }\
-        for (j=0; j<chrFilterSize; j++){\
-            U += chrSrc[j][i     ] * chrFilter[j];\
-            V += chrSrc[j][i+VOFW] * chrFilter[j];\
-        }\
-        Y >>=10;\
-        U >>=10;\
-        V >>=10;\
-
-#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
-    YSCALE_YUV_2_PACKEDX_FULL_C\
-        Y-= c->yuv2rgb_y_offset;\
-        Y*= c->yuv2rgb_y_coeff;\
-        Y+= rnd;\
-        R= Y + V*c->yuv2rgb_v2r_coeff;\
-        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
-        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
-        if ((R|G|B)&(0xC0000000)){\
-            if (R>=(256<<22))   R=(256<<22)-1; \
-            else if (R<0)R=0;   \
-            if (G>=(256<<22))   G=(256<<22)-1; \
-            else if (G<0)G=0;   \
-            if (B>=(256<<22))   B=(256<<22)-1; \
-            else if (B<0)B=0;   \
-        }\
-
-
-#define YSCALE_YUV_2_GRAY16_C \
-    for (i=0; i<(dstW>>1); i++){\
-        int j;\
-        int Y1 = 1<<18;\
-        int Y2 = 1<<18;\
-        int U  = 1<<18;\
-        int V  = 1<<18;\
-        \
-        const int i2= 2*i;\
-        \
-        for (j=0; j<lumFilterSize; j++)\
-        {\
-            Y1 += lumSrc[j][i2] * lumFilter[j];\
-            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
-        }\
-        Y1>>=11;\
-        Y2>>=11;\
-        if ((Y1|Y2|U|V)&65536)\
-        {\
-            if (Y1>65535)   Y1=65535; \
-            else if (Y1<0)Y1=0;   \
-            if (Y2>65535)   Y2=65535; \
-            else if (Y2<0)Y2=0;   \
-        }
-
-#define YSCALE_YUV_2_RGBX_C(type) \
-    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
-    r = (type *)c->table_rV[V];   \
-    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
-    b = (type *)c->table_bU[U];   \
-
-#define YSCALE_YUV_2_PACKED2_C   \
-    for (i=0; i<(dstW>>1); i++){ \
-        const int i2= 2*i;       \
-        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
-        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
-        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
-        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
-
-#define YSCALE_YUV_2_GRAY16_2_C   \
-    for (i=0; i<(dstW>>1); i++){ \
-        const int i2= 2*i;       \
-        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
-        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
-
-#define YSCALE_YUV_2_RGB2_C(type) \
-    YSCALE_YUV_2_PACKED2_C\
-    type *r, *b, *g;\
-    r = (type *)c->table_rV[V];\
-    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
-    b = (type *)c->table_bU[U];\
-
-#define YSCALE_YUV_2_PACKED1_C \
-    for (i=0; i<(dstW>>1); i++){\
-        const int i2= 2*i;\
-        int Y1= buf0[i2  ]>>7;\
-        int Y2= buf0[i2+1]>>7;\
-        int U= (uvbuf1[i     ])>>7;\
-        int V= (uvbuf1[i+VOFW])>>7;\
-
-#define YSCALE_YUV_2_GRAY16_1_C \
-    for (i=0; i<(dstW>>1); i++){\
-        const int i2= 2*i;\
-        int Y1= buf0[i2  ]<<1;\
-        int Y2= buf0[i2+1]<<1;\
-
-#define YSCALE_YUV_2_RGB1_C(type) \
-    YSCALE_YUV_2_PACKED1_C\
-    type *r, *b, *g;\
-    r = (type *)c->table_rV[V];\
-    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
-    b = (type *)c->table_bU[U];\
-
-#define YSCALE_YUV_2_PACKED1B_C \
-    for (i=0; i<(dstW>>1); i++){\
-        const int i2= 2*i;\
-        int Y1= buf0[i2  ]>>7;\
-        int Y2= buf0[i2+1]>>7;\
-        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
-        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
-
-#define YSCALE_YUV_2_RGB1B_C(type) \
-    YSCALE_YUV_2_PACKED1B_C\
-    type *r, *b, *g;\
-    r = (type *)c->table_rV[V];\
-    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
-    b = (type *)c->table_bU[U];\
-
-#define YSCALE_YUV_2_MONO2_C \
-    const uint8_t * const d128=dither_8x8_220[y&7];\
-    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
-    for (i=0; i<dstW-7; i+=8){\
-        int acc;\
-        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
-        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
-        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
-        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
-        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
-        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
-        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
-        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
-        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
-        dest++;\
-    }\
-
-
-#define YSCALE_YUV_2_MONOX_C \
-    const uint8_t * const d128=dither_8x8_220[y&7];\
-    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
-    int acc=0;\
-    for (i=0; i<dstW-1; i+=2){\
-        int j;\
-        int Y1=1<<18;\
-        int Y2=1<<18;\
-\
-        for (j=0; j<lumFilterSize; j++)\
-        {\
-            Y1 += lumSrc[j][i] * lumFilter[j];\
-            Y2 += lumSrc[j][i+1] * lumFilter[j];\
-        }\
-        Y1>>=19;\
-        Y2>>=19;\
-        if ((Y1|Y2)&256)\
-        {\
-            if (Y1>255)   Y1=255;\
-            else if (Y1<0)Y1=0;\
-            if (Y2>255)   Y2=255;\
-            else if (Y2<0)Y2=0;\
-        }\
-        acc+= acc + g[Y1+d128[(i+0)&7]];\
-        acc+= acc + g[Y2+d128[(i+1)&7]];\
-        if ((i&7)==6){\
-            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
-            dest++;\
-        }\
-    }
-
-
-#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
-    switch(c->dstFormat)\
-    {\
-    case PIX_FMT_RGB32:\
-    case PIX_FMT_BGR32:\
-    case PIX_FMT_RGB32_1:\
-    case PIX_FMT_BGR32_1:\
-        func(uint32_t)\
-            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
-            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
-        }                \
-        break;\
-    case PIX_FMT_RGB24:\
-        func(uint8_t)\
-            ((uint8_t*)dest)[0]= r[Y1];\
-            ((uint8_t*)dest)[1]= g[Y1];\
-            ((uint8_t*)dest)[2]= b[Y1];\
-            ((uint8_t*)dest)[3]= r[Y2];\
-            ((uint8_t*)dest)[4]= g[Y2];\
-            ((uint8_t*)dest)[5]= b[Y2];\
-            dest+=6;\
-        }\
-        break;\
-    case PIX_FMT_BGR24:\
-        func(uint8_t)\
-            ((uint8_t*)dest)[0]= b[Y1];\
-            ((uint8_t*)dest)[1]= g[Y1];\
-            ((uint8_t*)dest)[2]= r[Y1];\
-            ((uint8_t*)dest)[3]= b[Y2];\
-            ((uint8_t*)dest)[4]= g[Y2];\
-            ((uint8_t*)dest)[5]= r[Y2];\
-            dest+=6;\
-        }\
-        break;\
-    case PIX_FMT_RGB565:\
-    case PIX_FMT_BGR565:\
-        {\
-            const int dr1= dither_2x2_8[y&1    ][0];\
-            const int dg1= dither_2x2_4[y&1    ][0];\
-            const int db1= dither_2x2_8[(y&1)^1][0];\
-            const int dr2= dither_2x2_8[y&1    ][1];\
-            const int dg2= dither_2x2_4[y&1    ][1];\
-            const int db2= dither_2x2_8[(y&1)^1][1];\
-            func(uint16_t)\
-                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
-                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB555:\
-    case PIX_FMT_BGR555:\
-        {\
-            const int dr1= dither_2x2_8[y&1    ][0];\
-            const int dg1= dither_2x2_8[y&1    ][1];\
-            const int db1= dither_2x2_8[(y&1)^1][0];\
-            const int dr2= dither_2x2_8[y&1    ][1];\
-            const int dg2= dither_2x2_8[y&1    ][0];\
-            const int db2= dither_2x2_8[(y&1)^1][1];\
-            func(uint16_t)\
-                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
-                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB8:\
-    case PIX_FMT_BGR8:\
-        {\
-            const uint8_t * const d64= dither_8x8_73[y&7];\
-            const uint8_t * const d32= dither_8x8_32[y&7];\
-            func(uint8_t)\
-                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
-                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB4:\
-    case PIX_FMT_BGR4:\
-        {\
-            const uint8_t * const d64= dither_8x8_73 [y&7];\
-            const uint8_t * const d128=dither_8x8_220[y&7];\
-            func(uint8_t)\
-                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
-                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB4_BYTE:\
-    case PIX_FMT_BGR4_BYTE:\
-        {\
-            const uint8_t * const d64= dither_8x8_73 [y&7];\
-            const uint8_t * const d128=dither_8x8_220[y&7];\
-            func(uint8_t)\
-                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
-                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_MONOBLACK:\
-    case PIX_FMT_MONOWHITE:\
-        {\
-            func_monoblack\
-        }\
-        break;\
-    case PIX_FMT_YUYV422:\
-        func2\
-            ((uint8_t*)dest)[2*i2+0]= Y1;\
-            ((uint8_t*)dest)[2*i2+1]= U;\
-            ((uint8_t*)dest)[2*i2+2]= Y2;\
-            ((uint8_t*)dest)[2*i2+3]= V;\
-        }                \
-        break;\
-    case PIX_FMT_UYVY422:\
-        func2\
-            ((uint8_t*)dest)[2*i2+0]= U;\
-            ((uint8_t*)dest)[2*i2+1]= Y1;\
-            ((uint8_t*)dest)[2*i2+2]= V;\
-            ((uint8_t*)dest)[2*i2+3]= Y2;\
-        }                \
-        break;\
-    case PIX_FMT_GRAY16BE:\
-        func_g16\
-            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
-            ((uint8_t*)dest)[2*i2+1]= Y1;\
-            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
-            ((uint8_t*)dest)[2*i2+3]= Y2;\
-        }                \
-        break;\
-    case PIX_FMT_GRAY16LE:\
-        func_g16\
-            ((uint8_t*)dest)[2*i2+0]= Y1;\
-            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
-            ((uint8_t*)dest)[2*i2+2]= Y2;\
-            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
-        }                \
-        break;\
-    }\
-
-
-static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                  uint8_t *dest, int dstW, int y)
-{
-    int i;
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
-}
-
-static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                    uint8_t *dest, int dstW, int y)
-{
-    int i;
-    int step= fmt_depth(c->dstFormat)/8;
-    int aidx= 3;
-
-    switch(c->dstFormat){
-    case PIX_FMT_ARGB:
-        dest++;
-        aidx= -1;
-    case PIX_FMT_RGB24:
-        aidx--;
-    case PIX_FMT_RGBA:
-        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
-            dest[aidx]= 255;
-            dest[0]= R>>22;
-            dest[1]= G>>22;
-            dest[2]= B>>22;
-            dest+= step;
-        }
-        break;
-    case PIX_FMT_ABGR:
-        dest++;
-        aidx= -1;
-    case PIX_FMT_BGR24:
-        aidx--;
-    case PIX_FMT_BGRA:
-        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
-            dest[aidx]= 255;
-            dest[0]= B>>22;
-            dest[1]= G>>22;
-            dest[2]= R>>22;
-            dest+= step;
-        }
-        break;
-    default:
-        assert(0);
-    }
-}
-
-//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
-//Plain C versions
-#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
-#define COMPILE_C
-#endif
-
-#if ARCH_PPC
-#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
-#undef COMPILE_C
-#define COMPILE_ALTIVEC
-#endif
-#endif //ARCH_PPC
-
-#if ARCH_X86
-
-#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
-#define COMPILE_MMX
-#endif
-
-#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
-#define COMPILE_MMX2
-#endif
-
-#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
-#define COMPILE_3DNOW
-#endif
-#endif //ARCH_X86
-
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#undef HAVE_ALTIVEC
-#define HAVE_MMX 0
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
-#define HAVE_ALTIVEC 0
-
-#ifdef COMPILE_C
-#define RENAME(a) a ## _C
-#include "swscale_template.c"
-#endif
-
-#ifdef COMPILE_ALTIVEC
-#undef RENAME
-#undef HAVE_ALTIVEC
-#define HAVE_ALTIVEC 1
-#define RENAME(a) a ## _altivec
-#include "swscale_template.c"
-#endif
-
-#if ARCH_X86
-
-//x86 versions
-/*
-#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define ARCH_X86
-#define RENAME(a) a ## _X86
-#include "swscale_template.c"
-*/
-//MMX versions
-#ifdef COMPILE_MMX
-#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
-#define RENAME(a) a ## _MMX
-#include "swscale_template.c"
-#endif
-
-//MMX2 versions
-#ifdef COMPILE_MMX2
-#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMX2 1
-#define HAVE_AMD3DNOW 0
-#define RENAME(a) a ## _MMX2
-#include "swscale_template.c"
-#endif
-
-//3DNOW versions
-#ifdef COMPILE_3DNOW
-#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 1
-#define RENAME(a) a ## _3DNow
-#include "swscale_template.c"
-#endif
-
-#endif //ARCH_X86
-
-// minor note: the HAVE_xyz are messed up after this line so don't use them
-
-static double getSplineCoeff(double a, double b, double c, double d, double dist)
-{
-//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
-    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
-    else                return getSplineCoeff(        0.0,
-                                             b+ 2.0*c + 3.0*d,
-                                                    c + 3.0*d,
-                                            -b- 3.0*c - 6.0*d,
-                                            dist-1.0);
-}
-
-static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
-                             int srcW, int dstW, int filterAlign, int one, int flags,
-                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
-{
-    int i;
-    int filterSize;
-    int filter2Size;
-    int minFilterSize;
-    int64_t *filter=NULL;
-    int64_t *filter2=NULL;
-    const int64_t fone= 1LL<<54;
-    int ret= -1;
-#if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
-        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
-#endif
-
-    // NOTE: the +1 is for the MMX scaler which reads over the end
-    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
-
-    if (FFABS(xInc - 0x10000) <10) // unscaled
-    {
-        int i;
-        filterSize= 1;
-        filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
-
-        for (i=0; i<dstW; i++)
-        {
-            filter[i*filterSize]= fone;
-            (*filterPos)[i]=i;
-        }
-
-    }
-    else if (flags&SWS_POINT) // lame looking point sampling mode
-    {
-        int i;
-        int xDstInSrc;
-        filterSize= 1;
-        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
-
-        xDstInSrc= xInc/2 - 0x8000;
-        for (i=0; i<dstW; i++)
-        {
-            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
-
-            (*filterPos)[i]= xx;
-            filter[i]= fone;
-            xDstInSrc+= xInc;
-        }
-    }
-    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
-    {
-        int i;
-        int xDstInSrc;
-        if      (flags&SWS_BICUBIC) filterSize= 4;
-        else if (flags&SWS_X      ) filterSize= 4;
-        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
-        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
-
-        xDstInSrc= xInc/2 - 0x8000;
-        for (i=0; i<dstW; i++)
-        {
-            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
-            int j;
-
-            (*filterPos)[i]= xx;
-                //bilinear upscale / linear interpolate / area averaging
-                for (j=0; j<filterSize; j++)
-                {
-                    int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
-                    if (coeff<0) coeff=0;
-                    filter[i*filterSize + j]= coeff;
-                    xx++;
-                }
-            xDstInSrc+= xInc;
-        }
-    }
-    else
-    {
-        int xDstInSrc;
-        int sizeFactor;
-
-        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
-        else if (flags&SWS_X)            sizeFactor=  8;
-        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
-        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
-        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
-        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
-        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
-        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
-        else {
-            sizeFactor= 0; //GCC warning killer
-            assert(0);
-        }
-
-        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
-        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
-
-        if (filterSize > srcW-2) filterSize=srcW-2;
-
-        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
-
-        xDstInSrc= xInc - 0x10000;
-        for (i=0; i<dstW; i++)
-        {
-            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
-            int j;
-            (*filterPos)[i]= xx;
-            for (j=0; j<filterSize; j++)
-            {
-                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
-                double floatd;
-                int64_t coeff;
-
-                if (xInc > 1<<16)
-                    d= d*dstW/srcW;
-                floatd= d * (1.0/(1<<30));
-
-                if (flags & SWS_BICUBIC)
-                {
-                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
-                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
-                    int64_t dd = ( d*d)>>30;
-                    int64_t ddd= (dd*d)>>30;
-
-                    if      (d < 1LL<<30)
-                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
-                    else if (d < 1LL<<31)
-                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
-                    else
-                        coeff=0.0;
-                    coeff *= fone>>(30+24);
-                }
-/*                else if (flags & SWS_X)
-                {
-                    double p= param ? param*0.01 : 0.3;
-                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
-                    coeff*= pow(2.0, - p*d*d);
-                }*/
-                else if (flags & SWS_X)
-                {
-                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
-                    double c;
-
-                    if (floatd<1.0)
-                        c = cos(floatd*PI);
-                    else
-                        c=-1.0;
-                    if (c<0.0)      c= -pow(-c, A);
-                    else            c=  pow( c, A);
-                    coeff= (c*0.5 + 0.5)*fone;
-                }
-                else if (flags & SWS_AREA)
-                {
-                    int64_t d2= d - (1<<29);
-                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
-                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
-                    else coeff=0.0;
-                    coeff *= fone>>(30+16);
-                }
-                else if (flags & SWS_GAUSS)
-                {
-                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
-                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
-                }
-                else if (flags & SWS_SINC)
-                {
-                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
-                }
-                else if (flags & SWS_LANCZOS)
-                {
-                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
-                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
-                    if (floatd>p) coeff=0;
-                }
-                else if (flags & SWS_BILINEAR)
-                {
-                    coeff= (1<<30) - d;
-                    if (coeff<0) coeff=0;
-                    coeff *= fone >> 30;
-                }
-                else if (flags & SWS_SPLINE)
-                {
-                    double p=-2.196152422706632;
-                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
-                }
-                else {
-                    coeff= 0.0; //GCC warning killer
-                    assert(0);
-                }
-
-                filter[i*filterSize + j]= coeff;
-                xx++;
-            }
-            xDstInSrc+= 2*xInc;
-        }
-    }
-
-    /* apply src & dst Filter to filter -> filter2
-       av_free(filter);
-    */
-    assert(filterSize>0);
-    filter2Size= filterSize;
-    if (srcFilter) filter2Size+= srcFilter->length - 1;
-    if (dstFilter) filter2Size+= dstFilter->length - 1;
-    assert(filter2Size>0);
-    filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
-
-    for (i=0; i<dstW; i++)
-    {
-        int j, k;
-
-        if(srcFilter){
-            for (k=0; k<srcFilter->length; k++){
-                for (j=0; j<filterSize; j++)
-                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
-            }
-        }else{
-            for (j=0; j<filterSize; j++)
-                filter2[i*filter2Size + j]= filter[i*filterSize + j];
-        }
-        //FIXME dstFilter
-
-        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
-    }
-    av_freep(&filter);
-
-    /* try to reduce the filter-size (step1 find size and shift left) */
-    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
-    minFilterSize= 0;
-    for (i=dstW-1; i>=0; i--)
-    {
-        int min= filter2Size;
-        int j;
-        int64_t cutOff=0.0;
-
-        /* get rid off near zero elements on the left by shifting left */
-        for (j=0; j<filter2Size; j++)
-        {
-            int k;
-            cutOff += FFABS(filter2[i*filter2Size]);
-
-            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
-
-            /* preserve monotonicity because the core can't handle the filter otherwise */
-            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
-
-            // move filter coefficients left
-            for (k=1; k<filter2Size; k++)
-                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
-            filter2[i*filter2Size + k - 1]= 0;
-            (*filterPos)[i]++;
-        }
-
-        cutOff=0;
-        /* count near zeros on the right */
-        for (j=filter2Size-1; j>0; j--)
-        {
-            cutOff += FFABS(filter2[i*filter2Size + j]);
-
-            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
-            min--;
-        }
-
-        if (min>minFilterSize) minFilterSize= min;
-    }
-
-    if (flags & SWS_CPU_CAPS_ALTIVEC) {
-        // we can handle the special case 4,
-        // so we don't want to go to the full 8
-        if (minFilterSize < 5)
-            filterAlign = 4;
-
-        // We really don't want to waste our time
-        // doing useless computation, so fall back on
-        // the scalar C code for very small filters.
-        // Vectorizing is worth it only if you have a
-        // decent-sized vector.
-        if (minFilterSize < 3)
-            filterAlign = 1;
-    }
-
-    if (flags & SWS_CPU_CAPS_MMX) {
-        // special case for unscaled vertical filtering
-        if (minFilterSize == 1 && filterAlign == 2)
-            filterAlign= 1;
-    }
-
-    assert(minFilterSize > 0);
-    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
-    assert(filterSize > 0);
-    filter= av_malloc(filterSize*dstW*sizeof(*filter));
-    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
-        goto error;
-    *outFilterSize= filterSize;
-
-    if (flags&SWS_PRINT_INFO)
-        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
-    /* try to reduce the filter-size (step2 reduce it) */
-    for (i=0; i<dstW; i++)
-    {
-        int j;
-
-        for (j=0; j<filterSize; j++)
-        {
-            if (j>=filter2Size) filter[i*filterSize + j]= 0;
-            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
-            if((flags & SWS_BITEXACT) && j>=minFilterSize)
-                filter[i*filterSize + j]= 0;
-        }
-    }
-
-
-    //FIXME try to align filterPos if possible
-
-    //fix borders
-    for (i=0; i<dstW; i++)
-    {
-        int j;
-        if ((*filterPos)[i] < 0)
-        {
-            // move filter coefficients left to compensate for filterPos
-            for (j=1; j<filterSize; j++)
-            {
-                int left= FFMAX(j + (*filterPos)[i], 0);
-                filter[i*filterSize + left] += filter[i*filterSize + j];
-                filter[i*filterSize + j]=0;
-            }
-            (*filterPos)[i]= 0;
-        }
-
-        if ((*filterPos)[i] + filterSize > srcW)
-        {
-            int shift= (*filterPos)[i] + filterSize - srcW;
-            // move filter coefficients right to compensate for filterPos
-            for (j=filterSize-2; j>=0; j--)
-            {
-                int right= FFMIN(j + shift, filterSize-1);
-                filter[i*filterSize +right] += filter[i*filterSize +j];
-                filter[i*filterSize +j]=0;
-            }
-            (*filterPos)[i]= srcW - filterSize;
-        }
-    }
-
-    // Note the +1 is for the MMX scaler which reads over the end
-    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
-    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
-
-    /* normalize & store in outFilter */
-    for (i=0; i<dstW; i++)
-    {
-        int j;
-        int64_t error=0;
-        int64_t sum=0;
-
-        for (j=0; j<filterSize; j++)
-        {
-            sum+= filter[i*filterSize + j];
-        }
-        sum= (sum + one/2)/ one;
-        for (j=0; j<*outFilterSize; j++)
-        {
-            int64_t v= filter[i*filterSize + j] + error;
-            int intV= ROUNDED_DIV(v, sum);
-            (*outFilter)[i*(*outFilterSize) + j]= intV;
-            error= v - intV*sum;
-        }
-    }
-
-    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
-    for (i=0; i<*outFilterSize; i++)
-    {
-        int j= dstW*(*outFilterSize);
-        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
-    }
-
-    ret=0;
-error:
-    av_free(filter);
-    av_free(filter2);
-    return ret;
-}
-
-#ifdef COMPILE_MMX2
-static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
-{
-    uint8_t *fragmentA;
-    long imm8OfPShufW1A;
-    long imm8OfPShufW2A;
-    long fragmentLengthA;
-    uint8_t *fragmentB;
-    long imm8OfPShufW1B;
-    long imm8OfPShufW2B;
-    long fragmentLengthB;
-    int fragmentPos;
-
-    int xpos, i;
-
-    // create an optimized horizontal scaling routine
-
-    //code fragment
-
-    __asm__ volatile(
-        "jmp                         9f                 \n\t"
-    // Begin
-        "0:                                             \n\t"
-        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
-        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
-        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
-        "punpcklbw                %%mm7, %%mm1          \n\t"
-        "punpcklbw                %%mm7, %%mm0          \n\t"
-        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
-        "1:                                             \n\t"
-        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
-        "2:                                             \n\t"
-        "psubw                    %%mm1, %%mm0          \n\t"
-        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
-        "pmullw                   %%mm3, %%mm0          \n\t"
-        "psllw                       $7, %%mm1          \n\t"
-        "paddw                    %%mm1, %%mm0          \n\t"
-
-        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
-
-        "add                         $8, %%"REG_a"      \n\t"
-    // End
-        "9:                                             \n\t"
-//        "int $3                                         \n\t"
-        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
-        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
-        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
-        "dec                         %1                 \n\t"
-        "dec                         %2                 \n\t"
-        "sub                         %0, %1             \n\t"
-        "sub                         %0, %2             \n\t"
-        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
-        "sub                         %0, %3             \n\t"
-
-
-        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
-        "=r" (fragmentLengthA)
-    );
-
-    __asm__ volatile(
-        "jmp                         9f                 \n\t"
-    // Begin
-        "0:                                             \n\t"
-        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
-        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
-        "punpcklbw                %%mm7, %%mm0          \n\t"
-        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
-        "1:                                             \n\t"
-        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
-        "2:                                             \n\t"
-        "psubw                    %%mm1, %%mm0          \n\t"
-        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
-        "pmullw                   %%mm3, %%mm0          \n\t"
-        "psllw                       $7, %%mm1          \n\t"
-        "paddw                    %%mm1, %%mm0          \n\t"
-
-        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
-
-        "add                         $8, %%"REG_a"      \n\t"
-    // End
-        "9:                                             \n\t"
-//        "int                       $3                   \n\t"
-        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
-        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
-        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
-        "dec                         %1                 \n\t"
-        "dec                         %2                 \n\t"
-        "sub                         %0, %1             \n\t"
-        "sub                         %0, %2             \n\t"
-        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
-        "sub                         %0, %3             \n\t"
-
-
-        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
-        "=r" (fragmentLengthB)
-    );
-
-    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
-    fragmentPos=0;
-
-    for (i=0; i<dstW/numSplits; i++)
-    {
-        int xx=xpos>>16;
-
-        if ((i&3) == 0)
-        {
-            int a=0;
-            int b=((xpos+xInc)>>16) - xx;
-            int c=((xpos+xInc*2)>>16) - xx;
-            int d=((xpos+xInc*3)>>16) - xx;
-
-            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
-            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
-            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
-            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
-            filterPos[i/2]= xx;
-
-            if (d+1<4)
-            {
-                int maxShift= 3-(d+1);
-                int shift=0;
-
-                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
-
-                funnyCode[fragmentPos + imm8OfPShufW1B]=
-                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
-                funnyCode[fragmentPos + imm8OfPShufW2B]=
-                    a | (b<<2) | (c<<4) | (d<<6);
-
-                if (i+3>=dstW) shift=maxShift; //avoid overread
-                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
-
-                if (shift && i>=shift)
-                {
-                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
-                    filterPos[i/2]-=shift;
-                }
-
-                fragmentPos+= fragmentLengthB;
-            }
-            else
-            {
-                int maxShift= 3-d;
-                int shift=0;
-
-                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
-
-                funnyCode[fragmentPos + imm8OfPShufW1A]=
-                funnyCode[fragmentPos + imm8OfPShufW2A]=
-                    a | (b<<2) | (c<<4) | (d<<6);
-
-                if (i+4>=dstW) shift=maxShift; //avoid overread
-                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
-
-                if (shift && i>=shift)
-                {
-                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
-                    filterPos[i/2]-=shift;
-                }
-
-                fragmentPos+= fragmentLengthA;
-            }
-
-            funnyCode[fragmentPos]= RET;
-        }
-        xpos+=xInc;
-    }
-    filterPos[i/2]= xpos>>16; // needed to jump to the next part
-}
-#endif /* COMPILE_MMX2 */
-
-static void globalInit(void){
-    // generating tables:
-    int i;
-    for (i=0; i<768; i++){
-        int c= av_clip_uint8(i-256);
-        clip_table[i]=c;
-    }
-}
-
-static SwsFunc getSwsFunc(int flags){
-
-#if defined(RUNTIME_CPUDETECT) && CONFIG_GPL
-#if ARCH_X86
-    // ordered per speed fastest first
-    if (flags & SWS_CPU_CAPS_MMX2)
-        return swScale_MMX2;
-    else if (flags & SWS_CPU_CAPS_3DNOW)
-        return swScale_3DNow;
-    else if (flags & SWS_CPU_CAPS_MMX)
-        return swScale_MMX;
-    else
-        return swScale_C;
-
-#else
-#if ARCH_PPC
-    if (flags & SWS_CPU_CAPS_ALTIVEC)
-        return swScale_altivec;
-    else
-        return swScale_C;
-#endif
-    return swScale_C;
-#endif /* ARCH_X86 */
-#else //RUNTIME_CPUDETECT
-#if   HAVE_MMX2
-    return swScale_MMX2;
-#elif HAVE_AMD3DNOW
-    return swScale_3DNow;
-#elif HAVE_MMX
-    return swScale_MMX;
-#elif HAVE_ALTIVEC
-    return swScale_altivec;
-#else
-    return swScale_C;
-#endif
-#endif //!RUNTIME_CPUDETECT
-}
-
-static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-    /* Copy Y plane */
-    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
-        memcpy(dst, src[0], srcSliceH*dstStride[0]);
-    else
-    {
-        int i;
-        uint8_t *srcPtr= src[0];
-        uint8_t *dstPtr= dst;
-        for (i=0; i<srcSliceH; i++)
-        {
-            memcpy(dstPtr, srcPtr, c->srcW);
-            srcPtr+= srcStride[0];
-            dstPtr+= dstStride[0];
-        }
-    }
-    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
-    if (c->dstFormat == PIX_FMT_NV12)
-        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
-    else
-        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
-    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
-
-    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
-
-    return srcSliceH;
-}
-
-static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                          int srcSliceH, uint8_t* dst[], int dstStride[]){
-    const enum PixelFormat srcFormat= c->srcFormat;
-    const enum PixelFormat dstFormat= c->dstFormat;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
-                 const uint8_t *palette)=NULL;
-    int i;
-    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
-    uint8_t *srcPtr= src[0];
-
-    if (!usePal(srcFormat))
-        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
-
-    switch(dstFormat){
-    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
-    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
-    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
-    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
-    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
-    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
-    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
-    }
-
-
-    for (i=0; i<srcSliceH; i++) {
-        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
-        srcPtr+= srcStride[0];
-        dstPtr+= dstStride[0];
-    }
-
-    return srcSliceH;
-}
-
-/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
-static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                          int srcSliceH, uint8_t* dst[], int dstStride[]){
-    const enum PixelFormat srcFormat= c->srcFormat;
-    const enum PixelFormat dstFormat= c->dstFormat;
-    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
-    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
-    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
-    const int dstId= fmt_depth(dstFormat) >> 2;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
-
-    /* BGR -> BGR */
-    if (  (isBGR(srcFormat) && isBGR(dstFormat))
-       || (isRGB(srcFormat) && isRGB(dstFormat))){
-        switch(srcId | (dstId<<4)){
-        case 0x34: conv= rgb16to15; break;
-        case 0x36: conv= rgb24to15; break;
-        case 0x38: conv= rgb32to15; break;
-        case 0x43: conv= rgb15to16; break;
-        case 0x46: conv= rgb24to16; break;
-        case 0x48: conv= rgb32to16; break;
-        case 0x63: conv= rgb15to24; break;
-        case 0x64: conv= rgb16to24; break;
-        case 0x68: conv= rgb32to24; break;
-        case 0x83: conv= rgb15to32; break;
-        case 0x84: conv= rgb16to32; break;
-        case 0x86: conv= rgb24to32; break;
-        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
-        }
-    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
-             || (isRGB(srcFormat) && isBGR(dstFormat))){
-        switch(srcId | (dstId<<4)){
-        case 0x33: conv= rgb15tobgr15; break;
-        case 0x34: conv= rgb16tobgr15; break;
-        case 0x36: conv= rgb24tobgr15; break;
-        case 0x38: conv= rgb32tobgr15; break;
-        case 0x43: conv= rgb15tobgr16; break;
-        case 0x44: conv= rgb16tobgr16; break;
-        case 0x46: conv= rgb24tobgr16; break;
-        case 0x48: conv= rgb32tobgr16; break;
-        case 0x63: conv= rgb15tobgr24; break;
-        case 0x64: conv= rgb16tobgr24; break;
-        case 0x66: conv= rgb24tobgr24; break;
-        case 0x68: conv= rgb32tobgr24; break;
-        case 0x83: conv= rgb15tobgr32; break;
-        case 0x84: conv= rgb16tobgr32; break;
-        case 0x86: conv= rgb24tobgr32; break;
-        case 0x88: conv= rgb32tobgr32; break;
-        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
-        }
-    }else{
-        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
-    }
-
-    if(conv)
-    {
-        uint8_t *srcPtr= src[0];
-        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
-            srcPtr += ALT32_CORR;
-
-        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
-            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
-        else
-        {
-            int i;
-            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
-
-            for (i=0; i<srcSliceH; i++)
-            {
-                conv(srcPtr, dstPtr, c->srcW*srcBpp);
-                srcPtr+= srcStride[0];
-                dstPtr+= dstStride[0];
-            }
-        }
-    }
-    return srcSliceH;
-}
-
-static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                              int srcSliceH, uint8_t* dst[], int dstStride[]){
-
-    rgb24toyv12(
-        src[0],
-        dst[0]+ srcSliceY    *dstStride[0],
-        dst[1]+(srcSliceY>>1)*dstStride[1],
-        dst[2]+(srcSliceY>>1)*dstStride[2],
-        c->srcW, srcSliceH,
-        dstStride[0], dstStride[1], srcStride[0]);
-    return srcSliceH;
-}
-
-static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                             int srcSliceH, uint8_t* dst[], int dstStride[]){
-    int i;
-
-    /* copy Y */
-    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
-        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
-    else{
-        uint8_t *srcPtr= src[0];
-        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
-
-        for (i=0; i<srcSliceH; i++)
-        {
-            memcpy(dstPtr, srcPtr, c->srcW);
-            srcPtr+= srcStride[0];
-            dstPtr+= dstStride[0];
-        }
-    }
-
-    if (c->dstFormat==PIX_FMT_YUV420P){
-        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
-        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
-    }else{
-        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
-        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
-    }
-    return srcSliceH;
-}
-
-/* unscaled copy like stuff (assumes nearly identical formats) */
-static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                      int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
-        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
-    else
-    {
-        int i;
-        uint8_t *srcPtr= src[0];
-        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
-        int length=0;
-
-        /* universal length finder */
-        while(length+c->srcW <= FFABS(dstStride[0])
-           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
-        assert(length!=0);
-
-        for (i=0; i<srcSliceH; i++)
-        {
-            memcpy(dstPtr, srcPtr, length);
-            srcPtr+= srcStride[0];
-            dstPtr+= dstStride[0];
-        }
-    }
-    return srcSliceH;
-}
-
-static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                      int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    int plane;
-    for (plane=0; plane<3; plane++)
-    {
-        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
-        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
-        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
-
-        if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
-        {
-            if (!isGray(c->dstFormat))
-                memset(dst[plane], 128, dstStride[plane]*height);
-        }
-        else
-        {
-            if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
-                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
-            else
-            {
-                int i;
-                uint8_t *srcPtr= src[plane];
-                uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
-                for (i=0; i<height; i++)
-                {
-                    memcpy(dstPtr, srcPtr, length);
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
-                }
-            }
-        }
-    }
-    return srcSliceH;
-}
-
-static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                        int srcSliceH, uint8_t* dst[], int dstStride[]){
-
-    int length= c->srcW;
-    int y=      srcSliceY;
-    int height= srcSliceH;
-    int i, j;
-    uint8_t *srcPtr= src[0];
-    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
-
-    if (!isGray(c->dstFormat)){
-        int height= -((-srcSliceH)>>c->chrDstVSubSample);
-        memset(dst[1], 128, dstStride[1]*height);
-        memset(dst[2], 128, dstStride[2]*height);
-    }
-    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
-    for (i=0; i<height; i++)
-    {
-        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
-        srcPtr+= srcStride[0];
-        dstPtr+= dstStride[0];
-    }
-    return srcSliceH;
-}
-
-static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                        int srcSliceH, uint8_t* dst[], int dstStride[]){
-
-    int length= c->srcW;
-    int y=      srcSliceY;
-    int height= srcSliceH;
-    int i, j;
-    uint8_t *srcPtr= src[0];
-    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
-    for (i=0; i<height; i++)
-    {
-        for (j=0; j<length; j++)
-        {
-            dstPtr[j<<1] = srcPtr[j];
-            dstPtr[(j<<1)+1] = srcPtr[j];
-        }
-        srcPtr+= srcStride[0];
-        dstPtr+= dstStride[0];
-    }
-    return srcSliceH;
-}
-
-static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                      int srcSliceH, uint8_t* dst[], int dstStride[]){
-
-    int length= c->srcW;
-    int y=      srcSliceY;
-    int height= srcSliceH;
-    int i, j;
-    uint16_t *srcPtr= (uint16_t*)src[0];
-    uint16_t *dstPtr= (uint16_t*)(dst[0] + dstStride[0]*y/2);
-    for (i=0; i<height; i++)
-    {
-        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
-        srcPtr+= srcStride[0]/2;
-        dstPtr+= dstStride[0]/2;
-    }
-    return srcSliceH;
-}
-
-
-static void getSubSampleFactors(int *h, int *v, int format){
-    switch(format){
-    case PIX_FMT_UYVY422:
-    case PIX_FMT_YUYV422:
-        *h=1;
-        *v=0;
-        break;
-    case PIX_FMT_YUV420P:
-    case PIX_FMT_YUVA420P:
-    case PIX_FMT_GRAY16BE:
-    case PIX_FMT_GRAY16LE:
-    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
-    case PIX_FMT_NV12:
-    case PIX_FMT_NV21:
-        *h=1;
-        *v=1;
-        break;
-    case PIX_FMT_YUV440P:
-        *h=0;
-        *v=1;
-        break;
-    case PIX_FMT_YUV410P:
-        *h=2;
-        *v=2;
-        break;
-    case PIX_FMT_YUV444P:
-        *h=0;
-        *v=0;
-        break;
-    case PIX_FMT_YUV422P:
-        *h=1;
-        *v=0;
-        break;
-    case PIX_FMT_YUV411P:
-        *h=2;
-        *v=0;
-        break;
-    default:
-        *h=0;
-        *v=0;
-        break;
-    }
-}
-
-static uint16_t roundToInt16(int64_t f){
-    int r= (f + (1<<15))>>16;
-         if (r<-0x7FFF) return 0x8000;
-    else if (r> 0x7FFF) return 0x7FFF;
-    else                return r;
-}
-
-/**
- * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
- * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
- * @return -1 if not supported
- */
-int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
-    int64_t crv =  inv_table[0];
-    int64_t cbu =  inv_table[1];
-    int64_t cgu = -inv_table[2];
-    int64_t cgv = -inv_table[3];
-    int64_t cy  = 1<<16;
-    int64_t oy  = 0;
-
-    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
-    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
-
-    c->brightness= brightness;
-    c->contrast  = contrast;
-    c->saturation= saturation;
-    c->srcRange  = srcRange;
-    c->dstRange  = dstRange;
-    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0;
-
-    c->uOffset=   0x0400040004000400LL;
-    c->vOffset=   0x0400040004000400LL;
-
-    if (!srcRange){
-        cy= (cy*255) / 219;
-        oy= 16<<16;
-    }else{
-        crv= (crv*224) / 255;
-        cbu= (cbu*224) / 255;
-        cgu= (cgu*224) / 255;
-        cgv= (cgv*224) / 255;
-    }
-
-    cy = (cy *contrast             )>>16;
-    crv= (crv*contrast * saturation)>>32;
-    cbu= (cbu*contrast * saturation)>>32;
-    cgu= (cgu*contrast * saturation)>>32;
-    cgv= (cgv*contrast * saturation)>>32;
-
-    oy -= 256*brightness;
-
-    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
-    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
-    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
-    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
-    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
-    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
-
-    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
-    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
-    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
-    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
-    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
-    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
-
-    sws_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
-    //FIXME factorize
-
-#ifdef COMPILE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
-        sws_yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
-#endif
-    return 0;
-}
-
-/**
- * @return -1 if not supported
- */
-int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
-    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
-
-    *inv_table = c->srcColorspaceTable;
-    *table     = c->dstColorspaceTable;
-    *srcRange  = c->srcRange;
-    *dstRange  = c->dstRange;
-    *brightness= c->brightness;
-    *contrast  = c->contrast;
-    *saturation= c->saturation;
-
-    return 0;
-}
-
-static int handle_jpeg(enum PixelFormat *format)
-{
-    switch (*format) {
-        case PIX_FMT_YUVJ420P:
-            *format = PIX_FMT_YUV420P;
-            return 1;
-        case PIX_FMT_YUVJ422P:
-            *format = PIX_FMT_YUV422P;
-            return 1;
-        case PIX_FMT_YUVJ444P:
-            *format = PIX_FMT_YUV444P;
-            return 1;
-        case PIX_FMT_YUVJ440P:
-            *format = PIX_FMT_YUV440P;
-            return 1;
-        default:
-            return 0;
-    }
-}
-
-SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
-                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
-
-    SwsContext *c;
-    int i;
-    int usesVFilter, usesHFilter;
-    int unscaled, needsDither;
-    int srcRange, dstRange;
-    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
-#if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
-        __asm__ volatile("emms\n\t"::: "memory");
-#endif
-
-#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
-    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
-#if   HAVE_MMX2
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
-#elif HAVE_AMD3DNOW
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
-#elif HAVE_MMX
-    flags |= SWS_CPU_CAPS_MMX;
-#elif HAVE_ALTIVEC
-    flags |= SWS_CPU_CAPS_ALTIVEC;
-#elif ARCH_BFIN
-    flags |= SWS_CPU_CAPS_BFIN;
-#endif
-#endif /* RUNTIME_CPUDETECT */
-    if (clip_table[512] != 255) globalInit();
-    if (!rgb15to16) sws_rgb2rgb_init(flags);
-
-    unscaled = (srcW == dstW && srcH == dstH);
-    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
-        && (fmt_depth(dstFormat))<24
-        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
-
-    srcRange = handle_jpeg(&srcFormat);
-    dstRange = handle_jpeg(&dstFormat);
-
-    if (!isSupportedIn(srcFormat))
-    {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
-        return NULL;
-    }
-    if (!isSupportedOut(dstFormat))
-    {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
-        return NULL;
-    }
-
-    i= flags & ( SWS_POINT
-                |SWS_AREA
-                |SWS_BILINEAR
-                |SWS_FAST_BILINEAR
-                |SWS_BICUBIC
-                |SWS_X
-                |SWS_GAUSS
-                |SWS_LANCZOS
-                |SWS_SINC
-                |SWS_SPLINE
-                |SWS_BICUBLIN);
-    if(!i || (i & (i-1)))
-    {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
-        return NULL;
-    }
-
-    /* sanity check */
-    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
-    {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
-               srcW, srcH, dstW, dstH);
-        return NULL;
-    }
-    if(srcW > VOFW || dstW > VOFW){
-        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
-        return NULL;
-    }
-
-    if (!dstFilter) dstFilter= &dummyFilter;
-    if (!srcFilter) srcFilter= &dummyFilter;
-
-    c= av_mallocz(sizeof(SwsContext));
-
-    c->av_class = &sws_context_class;
-    c->srcW= srcW;
-    c->srcH= srcH;
-    c->dstW= dstW;
-    c->dstH= dstH;
-    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
-    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
-    c->flags= flags;
-    c->dstFormat= dstFormat;
-    c->srcFormat= srcFormat;
-    c->vRounder= 4* 0x0001000100010001ULL;
-
-    usesHFilter= usesVFilter= 0;
-    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
-    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
-    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
-    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
-    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
-    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
-    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
-    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
-
-    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
-    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
-
-    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
-    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
-
-    // drop some chroma lines if the user wants it
-    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
-    c->chrSrcVSubSample+= c->vChrDrop;
-
-    // drop every other pixel for chroma calculation unless user wants full chroma
-    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
-      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
-      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
-      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
-      && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
-        c->chrSrcHSubSample=1;
-
-    if (param){
-        c->param[0] = param[0];
-        c->param[1] = param[1];
-    }else{
-        c->param[0] =
-        c->param[1] = SWS_PARAM_DEFAULT;
-    }
-
-    c->chrIntHSubSample= c->chrDstHSubSample;
-    c->chrIntVSubSample= c->chrSrcVSubSample;
-
-    // Note the -((-x)>>y) is so that we always round toward +inf.
-    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
-    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
-    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
-    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
-
-    sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
-
-    /* unscaled special cases */
-    if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
-    {
-        /* yv12_to_nv12 */
-        if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
-        {
-            c->swScale= PlanarToNV12Wrapper;
-        }
-        /* yuv2bgr */
-        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
-            && !(flags & SWS_ACCURATE_RND) && !(dstH&1))
-        {
-            c->swScale= sws_yuv2rgb_get_func_ptr(c);
-        }
-
-        if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_BITEXACT))
-        {
-            c->swScale= yvu9toyv12Wrapper;
-        }
-
-        /* bgr24toYV12 */
-        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND))
-            c->swScale= bgr24toyv12Wrapper;
-
-        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
-        if (  (isBGR(srcFormat) || isRGB(srcFormat))
-           && (isBGR(dstFormat) || isRGB(dstFormat))
-           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
-           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
-           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
-           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
-           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
-           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
-           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
-           && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
-                                             && dstFormat != PIX_FMT_RGB32_1
-                                             && dstFormat != PIX_FMT_BGR32_1
-           && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
-             c->swScale= rgb2rgbWrapper;
-
-        if ((usePal(srcFormat) && (
-                 dstFormat == PIX_FMT_RGB32   ||
-                 dstFormat == PIX_FMT_RGB32_1 ||
-                 dstFormat == PIX_FMT_RGB24   ||
-                 dstFormat == PIX_FMT_BGR32   ||
-                 dstFormat == PIX_FMT_BGR32_1 ||
-                 dstFormat == PIX_FMT_BGR24)))
-             c->swScale= pal2rgbWrapper;
-
-        if (srcFormat == PIX_FMT_YUV422P)
-        {
-            if (dstFormat == PIX_FMT_YUYV422)
-                c->swScale= YUV422PToYuy2Wrapper;
-            else if (dstFormat == PIX_FMT_UYVY422)
-                c->swScale= YUV422PToUyvyWrapper;
-        }
-
-        /* LQ converters if -sws 0 or -sws 4*/
-        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
-            /* yv12_to_yuy2 */
-            if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P)
-            {
-                if (dstFormat == PIX_FMT_YUYV422)
-                    c->swScale= PlanarToYuy2Wrapper;
-                else if (dstFormat == PIX_FMT_UYVY422)
-                    c->swScale= PlanarToUyvyWrapper;
-            }
-        }
-
-#ifdef COMPILE_ALTIVEC
-        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
-            !(c->flags & SWS_BITEXACT) &&
-            srcFormat == PIX_FMT_YUV420P) {
-          // unscaled YV12 -> packed YUV, we want speed
-          if (dstFormat == PIX_FMT_YUYV422)
-              c->swScale= yv12toyuy2_unscaled_altivec;
-          else if (dstFormat == PIX_FMT_UYVY422)
-              c->swScale= yv12touyvy_unscaled_altivec;
-        }
-#endif
-
-        /* simple copy */
-        if (  srcFormat == dstFormat
-            || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
-            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
-            || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
-        {
-            if (isPacked(c->srcFormat))
-                c->swScale= packedCopy;
-            else /* Planar YUV or gray */
-                c->swScale= planarCopy;
-        }
-
-        /* gray16{le,be} conversions */
-        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
-        {
-            c->swScale= gray16togray;
-        }
-        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
-        {
-            c->swScale= graytogray16;
-        }
-        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
-        {
-            c->swScale= gray16swap;
-        }
-
-#if ARCH_BFIN
-        if (flags & SWS_CPU_CAPS_BFIN)
-            ff_bfin_get_unscaled_swscale (c);
-#endif
-
-        if (c->swScale){
-            if (flags&SWS_PRINT_INFO)
-                av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
-                                sws_format_name(srcFormat), sws_format_name(dstFormat));
-            return c;
-        }
-    }
-
-    if (flags & SWS_CPU_CAPS_MMX2)
-    {
-        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
-        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
-        {
-            if (flags&SWS_PRINT_INFO)
-                av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
-        }
-        if (usesHFilter) c->canMMX2BeUsed=0;
-    }
-    else
-        c->canMMX2BeUsed=0;
-
-    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
-    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
-
-    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
-    // but only for the FAST_BILINEAR mode otherwise do correct scaling
-    // n-2 is the last chrominance sample available
-    // this is not perfect, but no one should notice the difference, the more correct variant
-    // would be like the vertical one, but that would require some special code for the
-    // first and last pixel
-    if (flags&SWS_FAST_BILINEAR)
-    {
-        if (c->canMMX2BeUsed)
-        {
-            c->lumXInc+= 20;
-            c->chrXInc+= 20;
-        }
-        //we don't use the x86 asm scaler if MMX is available
-        else if (flags & SWS_CPU_CAPS_MMX)
-        {
-            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
-            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
-        }
-    }
-
-    /* precalculate horizontal scaler filter coefficients */
-    {
-        const int filterAlign=
-            (flags & SWS_CPU_CAPS_MMX) ? 4 :
-            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
-            1;
-
-        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
-                   srcW      ,       dstW, filterAlign, 1<<14,
-                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
-                   srcFilter->lumH, dstFilter->lumH, c->param);
-        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
-                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
-                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
-                   srcFilter->chrH, dstFilter->chrH, c->param);
-
-#define MAX_FUNNY_CODE_SIZE 10000
-#if defined(COMPILE_MMX2)
-// can't downscale !!!
-        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
-        {
-#ifdef MAP_ANONYMOUS
-            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-#else
-            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
-            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
-#endif
-
-            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
-            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
-            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
-            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
-
-            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
-            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
-        }
-#endif /* defined(COMPILE_MMX2) */
-    } // initialize horizontal stuff
-
-
-
-    /* precalculate vertical scaler filter coefficients */
-    {
-        const int filterAlign=
-            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
-            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
-            1;
-
-        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
-                   srcH      ,        dstH, filterAlign, (1<<12),
-                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
-                   srcFilter->lumV, dstFilter->lumV, c->param);
-        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
-                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
-                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
-                   srcFilter->chrV, dstFilter->chrV, c->param);
-
-#if HAVE_ALTIVEC
-        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
-        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
-
-        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
-            int j;
-            short *p = (short *)&c->vYCoeffsBank[i];
-            for (j=0;j<8;j++)
-                p[j] = c->vLumFilter[i];
-        }
-
-        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
-            int j;
-            short *p = (short *)&c->vCCoeffsBank[i];
-            for (j=0;j<8;j++)
-                p[j] = c->vChrFilter[i];
-        }
-#endif
-    }
-
-    // calculate buffer sizes so that they won't run out while handling these damn slices
-    c->vLumBufSize= c->vLumFilterSize;
-    c->vChrBufSize= c->vChrFilterSize;
-    for (i=0; i<dstH; i++)
-    {
-        int chrI= i*c->chrDstH / dstH;
-        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
-                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
-
-        nextSlice>>= c->chrSrcVSubSample;
-        nextSlice<<= c->chrSrcVSubSample;
-        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
-            c->vLumBufSize= nextSlice - c->vLumFilterPos[i];
-        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
-            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
-    }
-
-    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
-    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
-    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
-    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
-    /* align at 16 bytes for AltiVec */
-    for (i=0; i<c->vLumBufSize; i++)
-        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
-    for (i=0; i<c->vChrBufSize; i++)
-        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
-
-    //try to avoid drawing green stuff between the right end and the stride end
-    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
-
-    assert(2*VOFW == VOF);
-
-    assert(c->chrDstH <= dstH);
-
-    if (flags&SWS_PRINT_INFO)
-    {
-#ifdef DITHER1XBPP
-        const char *dither= " dithered";
-#else
-        const char *dither= "";
-#endif
-        if (flags&SWS_FAST_BILINEAR)
-            av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
-        else if (flags&SWS_BILINEAR)
-            av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
-        else if (flags&SWS_BICUBIC)
-            av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
-        else if (flags&SWS_X)
-            av_log(c, AV_LOG_INFO, "Experimental scaler, ");
-        else if (flags&SWS_POINT)
-            av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
-        else if (flags&SWS_AREA)
-            av_log(c, AV_LOG_INFO, "Area Averageing scaler, ");
-        else if (flags&SWS_BICUBLIN)
-            av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
-        else if (flags&SWS_GAUSS)
-            av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
-        else if (flags&SWS_SINC)
-            av_log(c, AV_LOG_INFO, "Sinc scaler, ");
-        else if (flags&SWS_LANCZOS)
-            av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
-        else if (flags&SWS_SPLINE)
-            av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
-        else
-            av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
-
-        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
-            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
-                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
-        else
-            av_log(c, AV_LOG_INFO, "from %s to %s ",
-                   sws_format_name(srcFormat), sws_format_name(dstFormat));
-
-        if (flags & SWS_CPU_CAPS_MMX2)
-            av_log(c, AV_LOG_INFO, "using MMX2\n");
-        else if (flags & SWS_CPU_CAPS_3DNOW)
-            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
-        else if (flags & SWS_CPU_CAPS_MMX)
-            av_log(c, AV_LOG_INFO, "using MMX\n");
-        else if (flags & SWS_CPU_CAPS_ALTIVEC)
-            av_log(c, AV_LOG_INFO, "using AltiVec\n");
-        else
-            av_log(c, AV_LOG_INFO, "using C\n");
-    }
-
-    if (flags & SWS_PRINT_INFO)
-    {
-        if (flags & SWS_CPU_CAPS_MMX)
-        {
-            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
-                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
-            else
-            {
-                if (c->hLumFilterSize==4)
-                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n");
-                else if (c->hLumFilterSize==8)
-                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal luminance scaling\n");
-                else
-                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal luminance scaling\n");
-
-                if (c->hChrFilterSize==4)
-                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
-                else if (c->hChrFilterSize==8)
-                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
-                else
-                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
-            }
-        }
-        else
-        {
-#if ARCH_X86
-            av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
-#else
-            if (flags & SWS_FAST_BILINEAR)
-                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
-            else
-                av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
-#endif
-        }
-        if (isPlanarYUV(dstFormat))
-        {
-            if (c->vLumFilterSize==1)
-                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-            else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-        }
-        else
-        {
-            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
-                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
-                       "      2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
-                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-            else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-        }
-
-        if (dstFormat==PIX_FMT_BGR24)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
-                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
-        else if (dstFormat==PIX_FMT_RGB32)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-        else if (dstFormat==PIX_FMT_BGR565)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-        else if (dstFormat==PIX_FMT_BGR555)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
-
-        av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
-    }
-    if (flags & SWS_PRINT_INFO)
-    {
-        av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
-               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
-        av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
-               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
-    }
-
-    c->swScale= getSwsFunc(flags);
-    return c;
-}
-
-/**
- * swscale wrapper, so we don't need to export the SwsContext.
- * Assumes planar YUV to be in YUV order instead of YVU.
- */
-int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-              int srcSliceH, uint8_t* dst[], int dstStride[]){
-    int i;
-    uint8_t* src2[4]= {src[0], src[1], src[2]};
-
-    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
-        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
-        return 0;
-    }
-    if (c->sliceDir == 0) {
-        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
-    }
-
-    if (usePal(c->srcFormat)){
-        for (i=0; i<256; i++){
-            int p, r, g, b,y,u,v;
-            if(c->srcFormat == PIX_FMT_PAL8){
-                p=((uint32_t*)(src[1]))[i];
-                r= (p>>16)&0xFF;
-                g= (p>> 8)&0xFF;
-                b=  p     &0xFF;
-            }else if(c->srcFormat == PIX_FMT_RGB8){
-                r= (i>>5    )*36;
-                g= ((i>>2)&7)*36;
-                b= (i&3     )*85;
-            }else if(c->srcFormat == PIX_FMT_BGR8){
-                b= (i>>6    )*85;
-                g= ((i>>3)&7)*36;
-                r= (i&7     )*36;
-            }else if(c->srcFormat == PIX_FMT_RGB4_BYTE){
-                r= (i>>3    )*255;
-                g= ((i>>1)&3)*85;
-                b= (i&1     )*255;
-            }else {
-                assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
-                b= (i>>3    )*255;
-                g= ((i>>1)&3)*85;
-                r= (i&1     )*255;
-            }
-            y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-            u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-            v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-            c->pal_yuv[i]= y + (u<<8) + (v<<16);
-
-
-            switch(c->dstFormat) {
-            case PIX_FMT_BGR32:
-#ifndef WORDS_BIGENDIAN
-            case PIX_FMT_RGB24:
-#endif
-                c->pal_rgb[i]=  r + (g<<8) + (b<<16);
-                break;
-            case PIX_FMT_BGR32_1:
-#ifdef  WORDS_BIGENDIAN
-            case PIX_FMT_BGR24:
-#endif
-                c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
-                break;
-            case PIX_FMT_RGB32_1:
-#ifdef  WORDS_BIGENDIAN
-            case PIX_FMT_RGB24:
-#endif
-                c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
-                break;
-            case PIX_FMT_RGB32:
-#ifndef WORDS_BIGENDIAN
-            case PIX_FMT_BGR24:
-#endif
-            default:
-                c->pal_rgb[i]=  b + (g<<8) + (r<<16);
-            }
-        }
-    }
-
-    // copy strides, so they can safely be modified
-    if (c->sliceDir == 1) {
-        // slices go from top to bottom
-        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
-        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
-        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
-    } else {
-        // slices go from bottom to top => we flip the image internally
-        uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
-                           dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
-                           dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
-        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
-        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
-
-        src2[0] += (srcSliceH-1)*srcStride[0];
-        if (!usePal(c->srcFormat))
-            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
-        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
-
-        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
-    }
-}
-
-#if LIBSWSCALE_VERSION_MAJOR < 1
-int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                      int srcSliceH, uint8_t* dst[], int dstStride[]){
-    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
-}
-#endif
-
-SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
-                                float lumaSharpen, float chromaSharpen,
-                                float chromaHShift, float chromaVShift,
-                                int verbose)
-{
-    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
-
-    if (lumaGBlur!=0.0){
-        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
-        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
-    }else{
-        filter->lumH= sws_getIdentityVec();
-        filter->lumV= sws_getIdentityVec();
-    }
-
-    if (chromaGBlur!=0.0){
-        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
-        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
-    }else{
-        filter->chrH= sws_getIdentityVec();
-        filter->chrV= sws_getIdentityVec();
-    }
-
-    if (chromaSharpen!=0.0){
-        SwsVector *id= sws_getIdentityVec();
-        sws_scaleVec(filter->chrH, -chromaSharpen);
-        sws_scaleVec(filter->chrV, -chromaSharpen);
-        sws_addVec(filter->chrH, id);
-        sws_addVec(filter->chrV, id);
-        sws_freeVec(id);
-    }
-
-    if (lumaSharpen!=0.0){
-        SwsVector *id= sws_getIdentityVec();
-        sws_scaleVec(filter->lumH, -lumaSharpen);
-        sws_scaleVec(filter->lumV, -lumaSharpen);
-        sws_addVec(filter->lumH, id);
-        sws_addVec(filter->lumV, id);
-        sws_freeVec(id);
-    }
-
-    if (chromaHShift != 0.0)
-        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
-
-    if (chromaVShift != 0.0)
-        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
-
-    sws_normalizeVec(filter->chrH, 1.0);
-    sws_normalizeVec(filter->chrV, 1.0);
-    sws_normalizeVec(filter->lumH, 1.0);
-    sws_normalizeVec(filter->lumV, 1.0);
-
-    if (verbose) sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
-    if (verbose) sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
-
-    return filter;
-}
-
-SwsVector *sws_getGaussianVec(double variance, double quality){
-    const int length= (int)(variance*quality + 0.5) | 1;
-    int i;
-    double *coeff= av_malloc(length*sizeof(double));
-    double middle= (length-1)*0.5;
-    SwsVector *vec= av_malloc(sizeof(SwsVector));
-
-    vec->coeff= coeff;
-    vec->length= length;
-
-    for (i=0; i<length; i++)
-    {
-        double dist= i-middle;
-        coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI);
-    }
-
-    sws_normalizeVec(vec, 1.0);
-
-    return vec;
-}
-
-SwsVector *sws_getConstVec(double c, int length){
-    int i;
-    double *coeff= av_malloc(length*sizeof(double));
-    SwsVector *vec= av_malloc(sizeof(SwsVector));
-
-    vec->coeff= coeff;
-    vec->length= length;
-
-    for (i=0; i<length; i++)
-        coeff[i]= c;
-
-    return vec;
-}
-
-
-SwsVector *sws_getIdentityVec(void){
-    return sws_getConstVec(1.0, 1);
-}
-
-double sws_dcVec(SwsVector *a){
-    int i;
-    double sum=0;
-
-    for (i=0; i<a->length; i++)
-        sum+= a->coeff[i];
-
-    return sum;
-}
-
-void sws_scaleVec(SwsVector *a, double scalar){
-    int i;
-
-    for (i=0; i<a->length; i++)
-        a->coeff[i]*= scalar;
-}
-
-void sws_normalizeVec(SwsVector *a, double height){
-    sws_scaleVec(a, height/sws_dcVec(a));
-}
-
-static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
-    int length= a->length + b->length - 1;
-    double *coeff= av_malloc(length*sizeof(double));
-    int i, j;
-    SwsVector *vec= av_malloc(sizeof(SwsVector));
-
-    vec->coeff= coeff;
-    vec->length= length;
-
-    for (i=0; i<length; i++) coeff[i]= 0.0;
-
-    for (i=0; i<a->length; i++)
-    {
-        for (j=0; j<b->length; j++)
-        {
-            coeff[i+j]+= a->coeff[i]*b->coeff[j];
-        }
-    }
-
-    return vec;
-}
-
-static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
-    int length= FFMAX(a->length, b->length);
-    double *coeff= av_malloc(length*sizeof(double));
-    int i;
-    SwsVector *vec= av_malloc(sizeof(SwsVector));
-
-    vec->coeff= coeff;
-    vec->length= length;
-
-    for (i=0; i<length; i++) coeff[i]= 0.0;
-
-    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
-    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
-
-    return vec;
-}
-
-static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
-    int length= FFMAX(a->length, b->length);
-    double *coeff= av_malloc(length*sizeof(double));
-    int i;
-    SwsVector *vec= av_malloc(sizeof(SwsVector));
-
-    vec->coeff= coeff;
-    vec->length= length;
-
-    for (i=0; i<length; i++) coeff[i]= 0.0;
-
-    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
-    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
-
-    return vec;
-}
-
-/* shift left / or right if "shift" is negative */
-static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
-    int length= a->length + FFABS(shift)*2;
-    double *coeff= av_malloc(length*sizeof(double));
-    int i;
-    SwsVector *vec= av_malloc(sizeof(SwsVector));
-
-    vec->coeff= coeff;
-    vec->length= length;
-
-    for (i=0; i<length; i++) coeff[i]= 0.0;
-
-    for (i=0; i<a->length; i++)
-    {
-        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
-    }
-
-    return vec;
-}
-
-void sws_shiftVec(SwsVector *a, int shift){
-    SwsVector *shifted= sws_getShiftedVec(a, shift);
-    av_free(a->coeff);
-    a->coeff= shifted->coeff;
-    a->length= shifted->length;
-    av_free(shifted);
-}
-
-void sws_addVec(SwsVector *a, SwsVector *b){
-    SwsVector *sum= sws_sumVec(a, b);
-    av_free(a->coeff);
-    a->coeff= sum->coeff;
-    a->length= sum->length;
-    av_free(sum);
-}
-
-void sws_subVec(SwsVector *a, SwsVector *b){
-    SwsVector *diff= sws_diffVec(a, b);
-    av_free(a->coeff);
-    a->coeff= diff->coeff;
-    a->length= diff->length;
-    av_free(diff);
-}
-
-void sws_convVec(SwsVector *a, SwsVector *b){
-    SwsVector *conv= sws_getConvVec(a, b);
-    av_free(a->coeff);
-    a->coeff= conv->coeff;
-    a->length= conv->length;
-    av_free(conv);
-}
-
-SwsVector *sws_cloneVec(SwsVector *a){
-    double *coeff= av_malloc(a->length*sizeof(double));
-    int i;
-    SwsVector *vec= av_malloc(sizeof(SwsVector));
-
-    vec->coeff= coeff;
-    vec->length= a->length;
-
-    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
-
-    return vec;
-}
-
-void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){
-    int i;
-    double max=0;
-    double min=0;
-    double range;
-
-    for (i=0; i<a->length; i++)
-        if (a->coeff[i]>max) max= a->coeff[i];
-
-    for (i=0; i<a->length; i++)
-        if (a->coeff[i]<min) min= a->coeff[i];
-
-    range= max - min;
-
-    for (i=0; i<a->length; i++)
-    {
-        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
-        av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
-        for (;x>0; x--) av_log(log_ctx, log_level, " ");
-        av_log(log_ctx, log_level, "|\n");
-    }
-}
-
-#if LIBSWSCALE_VERSION_MAJOR < 1
-void sws_printVec(SwsVector *a){
-    sws_printVec2(a, NULL, AV_LOG_DEBUG);
-}
-#endif
-
-void sws_freeVec(SwsVector *a){
-    if (!a) return;
-    av_freep(&a->coeff);
-    a->length=0;
-    av_free(a);
-}
-
-void sws_freeFilter(SwsFilter *filter){
-    if (!filter) return;
-
-    if (filter->lumH) sws_freeVec(filter->lumH);
-    if (filter->lumV) sws_freeVec(filter->lumV);
-    if (filter->chrH) sws_freeVec(filter->chrH);
-    if (filter->chrV) sws_freeVec(filter->chrV);
-    av_free(filter);
-}
-
-
-void sws_freeContext(SwsContext *c){
-    int i;
-    if (!c) return;
-
-    if (c->lumPixBuf)
-    {
-        for (i=0; i<c->vLumBufSize; i++)
-            av_freep(&c->lumPixBuf[i]);
-        av_freep(&c->lumPixBuf);
-    }
-
-    if (c->chrPixBuf)
-    {
-        for (i=0; i<c->vChrBufSize; i++)
-            av_freep(&c->chrPixBuf[i]);
-        av_freep(&c->chrPixBuf);
-    }
-
-    av_freep(&c->vLumFilter);
-    av_freep(&c->vChrFilter);
-    av_freep(&c->hLumFilter);
-    av_freep(&c->hChrFilter);
-#if HAVE_ALTIVEC
-    av_freep(&c->vYCoeffsBank);
-    av_freep(&c->vCCoeffsBank);
-#endif
-
-    av_freep(&c->vLumFilterPos);
-    av_freep(&c->vChrFilterPos);
-    av_freep(&c->hLumFilterPos);
-    av_freep(&c->hChrFilterPos);
-
-#if ARCH_X86 && CONFIG_GPL
-#ifdef MAP_ANONYMOUS
-    if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
-    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
-#else
-    av_free(c->funnyYCode);
-    av_free(c->funnyUVCode);
-#endif
-    c->funnyYCode=NULL;
-    c->funnyUVCode=NULL;
-#endif /* ARCH_X86 && CONFIG_GPL */
-
-    av_freep(&c->lumMmx2Filter);
-    av_freep(&c->chrMmx2Filter);
-    av_freep(&c->lumMmx2FilterPos);
-    av_freep(&c->chrMmx2FilterPos);
-    av_freep(&c->yuvTable);
-
-    av_free(c);
-}
-
-struct SwsContext *sws_getCachedContext(struct SwsContext *context,
-                                        int srcW, int srcH, enum PixelFormat srcFormat,
-                                        int dstW, int dstH, enum PixelFormat dstFormat, int flags,
-                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
-{
-    static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};
-
-    if (!param)
-        param = default_param;
-
-    if (context) {
-        if (context->srcW != srcW || context->srcH != srcH ||
-            context->srcFormat != srcFormat ||
-            context->dstW != dstW || context->dstH != dstH ||
-            context->dstFormat != dstFormat || context->flags != flags ||
-            context->param[0] != param[0] || context->param[1] != param[1])
-        {
-            sws_freeContext(context);
-            context = NULL;
-        }
-    }
-    if (!context) {
-        return sws_getContext(srcW, srcH, srcFormat,
-                              dstW, dstH, dstFormat, flags,
-                              srcFilter, dstFilter, param);
-    }
-    return context;
-}
-
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
deleted file mode 100644
index 6efd90fcda..0000000000
--- a/libswscale/swscale.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_SWSCALE_H
-#define SWSCALE_SWSCALE_H
-
-/**
- * @file libswscale/swscale.h
- * @brief
- *     external api for the swscale stuff
- */
-
-#include "libavutil/avutil.h"
-
-#define LIBSWSCALE_VERSION_MAJOR 0
-#define LIBSWSCALE_VERSION_MINOR 7
-#define LIBSWSCALE_VERSION_MICRO 1
-
-#define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
-                                               LIBSWSCALE_VERSION_MINOR, \
-                                               LIBSWSCALE_VERSION_MICRO)
-#define LIBSWSCALE_VERSION      AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \
-                                           LIBSWSCALE_VERSION_MINOR, \
-                                           LIBSWSCALE_VERSION_MICRO)
-#define LIBSWSCALE_BUILD        LIBSWSCALE_VERSION_INT
-
-#define LIBSWSCALE_IDENT        "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION)
-
-/**
- * Returns the LIBSWSCALE_VERSION_INT constant.
- */
-unsigned swscale_version(void);
-
-/* values for the flags, the stuff on the command line is different */
-#define SWS_FAST_BILINEAR     1
-#define SWS_BILINEAR          2
-#define SWS_BICUBIC           4
-#define SWS_X                 8
-#define SWS_POINT          0x10
-#define SWS_AREA           0x20
-#define SWS_BICUBLIN       0x40
-#define SWS_GAUSS          0x80
-#define SWS_SINC          0x100
-#define SWS_LANCZOS       0x200
-#define SWS_SPLINE        0x400
-
-#define SWS_SRC_V_CHR_DROP_MASK     0x30000
-#define SWS_SRC_V_CHR_DROP_SHIFT    16
-
-#define SWS_PARAM_DEFAULT           123456
-
-#define SWS_PRINT_INFO              0x1000
-
-//the following 3 flags are not completely implemented
-//internal chrominace subsampling info
-#define SWS_FULL_CHR_H_INT    0x2000
-//input subsampling info
-#define SWS_FULL_CHR_H_INP    0x4000
-#define SWS_DIRECT_BGR        0x8000
-#define SWS_ACCURATE_RND      0x40000
-#define SWS_BITEXACT          0x80000
-
-#define SWS_CPU_CAPS_MMX      0x80000000
-#define SWS_CPU_CAPS_MMX2     0x20000000
-#define SWS_CPU_CAPS_3DNOW    0x40000000
-#define SWS_CPU_CAPS_ALTIVEC  0x10000000
-#define SWS_CPU_CAPS_BFIN     0x01000000
-
-#define SWS_MAX_REDUCE_CUTOFF 0.002
-
-#define SWS_CS_ITU709         1
-#define SWS_CS_FCC            4
-#define SWS_CS_ITU601         5
-#define SWS_CS_ITU624         5
-#define SWS_CS_SMPTE170M      5
-#define SWS_CS_SMPTE240M      7
-#define SWS_CS_DEFAULT        5
-
-
-
-// when used for filters they must have an odd number of elements
-// coeffs cannot be shared between vectors
-typedef struct {
-    double *coeff;              ///< pointer to the list of coefficients
-    int length;                 ///< number of coefficients in the vector
-} SwsVector;
-
-// vectors can be shared
-typedef struct {
-    SwsVector *lumH;
-    SwsVector *lumV;
-    SwsVector *chrH;
-    SwsVector *chrV;
-} SwsFilter;
-
-struct SwsContext;
-
-void sws_freeContext(struct SwsContext *swsContext);
-
-/**
- * Allocates and returns a SwsContext. You need it to perform
- * scaling/conversion operations using sws_scale().
- *
- * @param srcW the width of the source image
- * @param srcH the height of the source image
- * @param srcFormat the source image format
- * @param dstW the width of the destination image
- * @param dstH the height of the destination image
- * @param dstFormat the destination image format
- * @param flags specify which algorithm and options to use for rescaling
- * @return a pointer to an allocated context, or NULL in case of error
- */
-struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
-                                  SwsFilter *srcFilter, SwsFilter *dstFilter, double *param);
-
-/**
- * Scales the image slice in \p srcSlice and puts the resulting scaled
- * slice in the image in \p dst. A slice is a sequence of consecutive
- * rows in an image.
- *
- * @param context   the scaling context previously created with
- *                  sws_getContext()
- * @param srcSlice  the array containing the pointers to the planes of
- *                  the source slice
- * @param srcStride the array containing the strides for each plane of
- *                  the source image
- * @param srcSliceY the position in the source image of the slice to
- *                  process, that is the number (counted starting from
- *                  zero) in the image of the first row of the slice
- * @param srcSliceH the height of the source slice, that is the number
- *                  of rows in the slice
- * @param dst       the array containing the pointers to the planes of
- *                  the destination image
- * @param dstStride the array containing the strides for each plane of
- *                  the destination image
- * @return          the height of the output slice
- */
-int sws_scale(struct SwsContext *context, uint8_t* srcSlice[], int srcStride[], int srcSliceY,
-              int srcSliceH, uint8_t* dst[], int dstStride[]);
-#if LIBSWSCALE_VERSION_MAJOR < 1
-/**
- * @deprecated Use sws_scale() instead.
- */
-int sws_scale_ordered(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
-                      int srcSliceH, uint8_t* dst[], int dstStride[]) attribute_deprecated;
-#endif
-
-
-int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation);
-int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation);
-
-/**
- * Returns a normalized Gaussian curve used to filter stuff
- * quality=3 is high quality, lower is lower quality.
- */
-SwsVector *sws_getGaussianVec(double variance, double quality);
-
-/**
- * Allocates and returns a vector with \p length coefficients, all
- * with the same value \p c.
- */
-SwsVector *sws_getConstVec(double c, int length);
-
-/**
- * Allocates and returns a vector with just one coefficient, with
- * value 1.0.
- */
-SwsVector *sws_getIdentityVec(void);
-
-/**
- * Scales all the coefficients of \p a by the \p scalar value.
- */
-void sws_scaleVec(SwsVector *a, double scalar);
-
-/**
- * Scales all the coefficients of \p a so that their sum equals \p
- * height."
- */
-void sws_normalizeVec(SwsVector *a, double height);
-void sws_convVec(SwsVector *a, SwsVector *b);
-void sws_addVec(SwsVector *a, SwsVector *b);
-void sws_subVec(SwsVector *a, SwsVector *b);
-void sws_shiftVec(SwsVector *a, int shift);
-
-/**
- * Allocates and returns a clone of the vector \p a, that is a vector
- * with the same coefficients as \p a.
- */
-SwsVector *sws_cloneVec(SwsVector *a);
-
-#if LIBSWSCALE_VERSION_MAJOR < 1
-/**
- * @deprecated Use sws_printVec2() instead.
- */
-attribute_deprecated void sws_printVec(SwsVector *a);
-#endif
-
-/**
- * Prints with av_log() a textual representation of the vector \p a
- * if \p log_level <= av_log_level.
- */
-void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level);
-
-void sws_freeVec(SwsVector *a);
-
-SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
-                                float lumaSharpen, float chromaSharpen,
-                                float chromaHShift, float chromaVShift,
-                                int verbose);
-void sws_freeFilter(SwsFilter *filter);
-
-/**
- * Checks if \p context can be reused, otherwise reallocates a new
- * one.
- *
- * If \p context is NULL, just calls sws_getContext() to get a new
- * context. Otherwise, checks if the parameters are the ones already
- * saved in \p context. If that is the case, returns the current
- * context. Otherwise, frees \p context and gets a new context with
- * the new parameters.
- *
- * Be warned that \p srcFilter and \p dstFilter are not checked, they
- * are assumed to remain the same.
- */
-struct SwsContext *sws_getCachedContext(struct SwsContext *context,
-                                        int srcW, int srcH, enum PixelFormat srcFormat,
-                                        int dstW, int dstH, enum PixelFormat dstFormat, int flags,
-                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param);
-
-#endif /* SWSCALE_SWSCALE_H */
diff --git a/libswscale/swscale_altivec_template.c b/libswscale/swscale_altivec_template.c
deleted file mode 100644
index a008b966e8..0000000000
--- a/libswscale/swscale_altivec_template.c
+++ /dev/null
@@ -1,538 +0,0 @@
-/*
- * AltiVec-enhanced yuv2yuvX
- *
- * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
- * based on the equivalent C code in swscale.c
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#define vzero vec_splat_s32(0)
-
-static inline void
-altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) {
-    register int i;
-    vector unsigned int altivec_vectorShiftInt19 =
-        vec_add(vec_splat_u32(10), vec_splat_u32(9));
-    if ((unsigned long)dest % 16) {
-        /* badly aligned store, we force store alignment */
-        /* and will handle load misalignment on val w/ vec_perm */
-        vector unsigned char perm1;
-        vector signed int v1;
-        for (i = 0 ; (i < dstW) &&
-            (((unsigned long)dest + i) % 16) ; i++) {
-                int t = val[i] >> 19;
-                dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
-        }
-        perm1 = vec_lvsl(i << 2, val);
-        v1 = vec_ld(i << 2, val);
-        for ( ; i < (dstW - 15); i+=16) {
-            int offset = i << 2;
-            vector signed int v2 = vec_ld(offset + 16, val);
-            vector signed int v3 = vec_ld(offset + 32, val);
-            vector signed int v4 = vec_ld(offset + 48, val);
-            vector signed int v5 = vec_ld(offset + 64, val);
-            vector signed int v12 = vec_perm(v1, v2, perm1);
-            vector signed int v23 = vec_perm(v2, v3, perm1);
-            vector signed int v34 = vec_perm(v3, v4, perm1);
-            vector signed int v45 = vec_perm(v4, v5, perm1);
-
-            vector signed int vA = vec_sra(v12, altivec_vectorShiftInt19);
-            vector signed int vB = vec_sra(v23, altivec_vectorShiftInt19);
-            vector signed int vC = vec_sra(v34, altivec_vectorShiftInt19);
-            vector signed int vD = vec_sra(v45, altivec_vectorShiftInt19);
-            vector unsigned short vs1 = vec_packsu(vA, vB);
-            vector unsigned short vs2 = vec_packsu(vC, vD);
-            vector unsigned char vf = vec_packsu(vs1, vs2);
-            vec_st(vf, i, dest);
-            v1 = v5;
-        }
-    } else { // dest is properly aligned, great
-        for (i = 0; i < (dstW - 15); i+=16) {
-            int offset = i << 2;
-            vector signed int v1 = vec_ld(offset, val);
-            vector signed int v2 = vec_ld(offset + 16, val);
-            vector signed int v3 = vec_ld(offset + 32, val);
-            vector signed int v4 = vec_ld(offset + 48, val);
-            vector signed int v5 = vec_sra(v1, altivec_vectorShiftInt19);
-            vector signed int v6 = vec_sra(v2, altivec_vectorShiftInt19);
-            vector signed int v7 = vec_sra(v3, altivec_vectorShiftInt19);
-            vector signed int v8 = vec_sra(v4, altivec_vectorShiftInt19);
-            vector unsigned short vs1 = vec_packsu(v5, v6);
-            vector unsigned short vs2 = vec_packsu(v7, v8);
-            vector unsigned char vf = vec_packsu(vs1, vs2);
-            vec_st(vf, i, dest);
-        }
-    }
-    for ( ; i < dstW ; i++) {
-        int t = val[i] >> 19;
-        dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
-    }
-}
-
-static inline void
-yuv2yuvX_altivec_real(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                      int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                      uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
-{
-    const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
-    register int i, j;
-    {
-        int __attribute__ ((aligned (16))) val[dstW];
-
-        for (i = 0; i < (dstW -7); i+=4) {
-            vec_st(vini, i << 2, val);
-        }
-        for (; i < dstW; i++) {
-            val[i] = (1 << 18);
-        }
-
-        for (j = 0; j < lumFilterSize; j++) {
-            vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter);
-            vector unsigned char perm, perm0 = vec_lvsl(j << 1, lumFilter);
-            vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
-            vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
-
-            perm = vec_lvsl(0, lumSrc[j]);
-            l1 = vec_ld(0, lumSrc[j]);
-
-            for (i = 0; i < (dstW - 7); i+=8) {
-                int offset = i << 2;
-                vector signed short l2 = vec_ld((i << 1) + 16, lumSrc[j]);
-
-                vector signed int v1 = vec_ld(offset, val);
-                vector signed int v2 = vec_ld(offset + 16, val);
-
-                vector signed short ls = vec_perm(l1, l2, perm); // lumSrc[j][i] ... lumSrc[j][i+7]
-
-                vector signed int i1 = vec_mule(vLumFilter, ls);
-                vector signed int i2 = vec_mulo(vLumFilter, ls);
-
-                vector signed int vf1 = vec_mergeh(i1, i2);
-                vector signed int vf2 = vec_mergel(i1, i2); // lumSrc[j][i] * lumFilter[j] ... lumSrc[j][i+7] * lumFilter[j]
-
-                vector signed int vo1 = vec_add(v1, vf1);
-                vector signed int vo2 = vec_add(v2, vf2);
-
-                vec_st(vo1, offset, val);
-                vec_st(vo2, offset + 16, val);
-
-                l1 = l2;
-            }
-            for ( ; i < dstW; i++) {
-                val[i] += lumSrc[j][i] * lumFilter[j];
-            }
-        }
-        altivec_packIntArrayToCharArray(val, dest, dstW);
-    }
-    if (uDest != 0) {
-        int  __attribute__ ((aligned (16))) u[chrDstW];
-        int  __attribute__ ((aligned (16))) v[chrDstW];
-
-        for (i = 0; i < (chrDstW -7); i+=4) {
-            vec_st(vini, i << 2, u);
-            vec_st(vini, i << 2, v);
-        }
-        for (; i < chrDstW; i++) {
-            u[i] = (1 << 18);
-            v[i] = (1 << 18);
-        }
-
-        for (j = 0; j < chrFilterSize; j++) {
-            vector signed short l1, l1_V, vChrFilter = vec_ld(j << 1, chrFilter);
-            vector unsigned char perm, perm0 = vec_lvsl(j << 1, chrFilter);
-            vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
-            vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
-
-            perm = vec_lvsl(0, chrSrc[j]);
-            l1 = vec_ld(0, chrSrc[j]);
-            l1_V = vec_ld(2048 << 1, chrSrc[j]);
-
-            for (i = 0; i < (chrDstW - 7); i+=8) {
-                int offset = i << 2;
-                vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
-                vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]);
-
-                vector signed int v1 = vec_ld(offset, u);
-                vector signed int v2 = vec_ld(offset + 16, u);
-                vector signed int v1_V = vec_ld(offset, v);
-                vector signed int v2_V = vec_ld(offset + 16, v);
-
-                vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
-                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055]
-
-                vector signed int i1 = vec_mule(vChrFilter, ls);
-                vector signed int i2 = vec_mulo(vChrFilter, ls);
-                vector signed int i1_V = vec_mule(vChrFilter, ls_V);
-                vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
-
-                vector signed int vf1 = vec_mergeh(i1, i2);
-                vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
-                vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
-                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
-
-                vector signed int vo1 = vec_add(v1, vf1);
-                vector signed int vo2 = vec_add(v2, vf2);
-                vector signed int vo1_V = vec_add(v1_V, vf1_V);
-                vector signed int vo2_V = vec_add(v2_V, vf2_V);
-
-                vec_st(vo1, offset, u);
-                vec_st(vo2, offset + 16, u);
-                vec_st(vo1_V, offset, v);
-                vec_st(vo2_V, offset + 16, v);
-
-                l1 = l2;
-                l1_V = l2_V;
-            }
-            for ( ; i < chrDstW; i++) {
-                u[i] += chrSrc[j][i] * chrFilter[j];
-                v[i] += chrSrc[j][i + 2048] * chrFilter[j];
-            }
-        }
-        altivec_packIntArrayToCharArray(u, uDest, chrDstW);
-        altivec_packIntArrayToCharArray(v, vDest, chrDstW);
-    }
-}
-
-static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, int16_t *filter, int16_t *filterPos, int filterSize) {
-    register int i;
-    int __attribute__ ((aligned (16))) tempo[4];
-
-    if (filterSize % 4) {
-        for (i=0; i<dstW; i++) {
-            register int j;
-            register int srcPos = filterPos[i];
-            register int val = 0;
-            for (j=0; j<filterSize; j++) {
-                val += ((int)src[srcPos + j])*filter[filterSize*i + j];
-            }
-            dst[i] = FFMIN(val>>7, (1<<15)-1);
-        }
-    }
-    else
-    switch (filterSize) {
-    case 4:
-    {
-    for (i=0; i<dstW; i++) {
-        register int srcPos = filterPos[i];
-
-        vector unsigned char src_v0 = vec_ld(srcPos, src);
-        vector unsigned char src_v1, src_vF;
-        vector signed short src_v, filter_v;
-        vector signed int val_vEven, val_s;
-        if ((((int)src + srcPos)% 16) > 12) {
-            src_v1 = vec_ld(srcPos + 16, src);
-        }
-        src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
-
-        src_v = // vec_unpackh sign-extends...
-            (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-        // now put our elements in the even slots
-        src_v = vec_mergeh(src_v, (vector signed short)vzero);
-
-        filter_v = vec_ld(i << 3, filter);
-        // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
-
-        // The neat trick: We only care for half the elements,
-        // high or low depending on (i<<3)%16 (it's 0 or 8 here),
-        // and we're going to use vec_mule, so we choose
-        // carefully how to "unpack" the elements into the even slots.
-        if ((i << 3) % 16)
-            filter_v = vec_mergel(filter_v, (vector signed short)vzero);
-        else
-            filter_v = vec_mergeh(filter_v, (vector signed short)vzero);
-
-        val_vEven = vec_mule(src_v, filter_v);
-        val_s = vec_sums(val_vEven, vzero);
-        vec_st(val_s, 0, tempo);
-        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
-    }
-    }
-    break;
-
-    case 8:
-    {
-    for (i=0; i<dstW; i++) {
-        register int srcPos = filterPos[i];
-
-        vector unsigned char src_v0 = vec_ld(srcPos, src);
-        vector unsigned char src_v1, src_vF;
-        vector signed short src_v, filter_v;
-        vector signed int val_v, val_s;
-        if ((((int)src + srcPos)% 16) > 8) {
-            src_v1 = vec_ld(srcPos + 16, src);
-        }
-        src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
-
-        src_v = // vec_unpackh sign-extends...
-            (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-        filter_v = vec_ld(i << 4, filter);
-        // the 4 above is 3 (filterSize == 8) + 1 (sizeof(short) == 2)
-
-        val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
-        val_s = vec_sums(val_v, vzero);
-        vec_st(val_s, 0, tempo);
-        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
-    }
-    }
-    break;
-
-    case 16:
-    {
-        for (i=0; i<dstW; i++) {
-            register int srcPos = filterPos[i];
-
-            vector unsigned char src_v0 = vec_ld(srcPos, src);
-            vector unsigned char src_v1 = vec_ld(srcPos + 16, src);
-            vector unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
-
-            vector signed short src_vA = // vec_unpackh sign-extends...
-                (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-            vector signed short src_vB = // vec_unpackh sign-extends...
-                (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
-
-            vector signed short filter_v0 = vec_ld(i << 5, filter);
-            vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
-            // the 5 above are 4 (filterSize == 16) + 1 (sizeof(short) == 2)
-
-            vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
-            vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
-
-            vector signed int val_s = vec_sums(val_v, vzero);
-
-            vec_st(val_s, 0, tempo);
-            dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
-        }
-    }
-    break;
-
-    default:
-    {
-    for (i=0; i<dstW; i++) {
-        register int j;
-        register int srcPos = filterPos[i];
-
-        vector signed int val_s, val_v = (vector signed int)vzero;
-        vector signed short filter_v0R = vec_ld(i * 2 * filterSize, filter);
-        vector unsigned char permF = vec_lvsl((i * 2 * filterSize), filter);
-
-        vector unsigned char src_v0 = vec_ld(srcPos, src);
-        vector unsigned char permS = vec_lvsl(srcPos, src);
-
-        for (j = 0 ; j < filterSize - 15; j += 16) {
-            vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
-            vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
-
-            vector signed short src_vA = // vec_unpackh sign-extends...
-                (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-            vector signed short src_vB = // vec_unpackh sign-extends...
-                (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
-
-            vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
-            vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
-            vector signed short filter_v0  = vec_perm(filter_v0R, filter_v1R, permF);
-            vector signed short filter_v1  = vec_perm(filter_v1R, filter_v2R, permF);
-
-            vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
-            val_v = vec_msums(src_vB, filter_v1, val_acc);
-
-            filter_v0R = filter_v2R;
-            src_v0 = src_v1;
-        }
-
-        if (j < filterSize-7) {
-            // loading src_v0 is useless, it's already done above
-            //vector unsigned char src_v0 = vec_ld(srcPos + j, src);
-            vector unsigned char src_v1, src_vF;
-            vector signed short src_v, filter_v1R, filter_v;
-            if ((((int)src + srcPos)% 16) > 8) {
-                src_v1 = vec_ld(srcPos + j + 16, src);
-            }
-            src_vF = vec_perm(src_v0, src_v1, permS);
-
-            src_v = // vec_unpackh sign-extends...
-                (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
-            // loading filter_v0R is useless, it's already done above
-            //vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter);
-            filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
-            filter_v = vec_perm(filter_v0R, filter_v1R, permF);
-
-            val_v = vec_msums(src_v, filter_v, val_v);
-        }
-
-        val_s = vec_sums(val_v, vzero);
-
-        vec_st(val_s, 0, tempo);
-        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
-    }
-
-    }
-    }
-}
-
-static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                              int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) {
-    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
-    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-    uint8_t *ysrc = src[0];
-    uint8_t *usrc = src[1];
-    uint8_t *vsrc = src[2];
-    const int width = c->srcW;
-    const int height = srcSliceH;
-    const int lumStride = srcStride[0];
-    const int chromStride = srcStride[1];
-    const int dstStride = dstStride_a[0];
-    const vector unsigned char yperm = vec_lvsl(0, ysrc);
-    const int vertLumPerChroma = 2;
-    register unsigned int y;
-
-    if (width&15) {
-        yv12toyuy2(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
-        return srcSliceH;
-    }
-
-    /* This code assumes:
-
-    1) dst is 16 bytes-aligned
-    2) dstStride is a multiple of 16
-    3) width is a multiple of 16
-    4) lum & chrom stride are multiples of 8
-    */
-
-    for (y=0; y<height; y++) {
-        int i;
-        for (i = 0; i < width - 31; i+= 32) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_yA = vec_ld(i, ysrc);
-            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
-            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
-            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
-            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
-            vector unsigned char v_uA = vec_ld(j, usrc);
-            vector unsigned char v_uB = vec_ld(j + 16, usrc);
-            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
-            vector unsigned char v_vA = vec_ld(j, vsrc);
-            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
-            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
-            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
-            vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
-            vec_st(v_yuy2_0, (i << 1), dst);
-            vec_st(v_yuy2_1, (i << 1) + 16, dst);
-            vec_st(v_yuy2_2, (i << 1) + 32, dst);
-            vec_st(v_yuy2_3, (i << 1) + 48, dst);
-        }
-        if (i < width) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_y1 = vec_ld(i, ysrc);
-            vector unsigned char v_u = vec_ld(j, usrc);
-            vector unsigned char v_v = vec_ld(j, vsrc);
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
-            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
-            vec_st(v_yuy2_0, (i << 1), dst);
-            vec_st(v_yuy2_1, (i << 1) + 16, dst);
-        }
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst += dstStride;
-    }
-
-    return srcSliceH;
-}
-
-static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                              int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) {
-    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
-    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
-    uint8_t *ysrc = src[0];
-    uint8_t *usrc = src[1];
-    uint8_t *vsrc = src[2];
-    const int width = c->srcW;
-    const int height = srcSliceH;
-    const int lumStride = srcStride[0];
-    const int chromStride = srcStride[1];
-    const int dstStride = dstStride_a[0];
-    const int vertLumPerChroma = 2;
-    const vector unsigned char yperm = vec_lvsl(0, ysrc);
-    register unsigned int y;
-
-    if (width&15) {
-        yv12touyvy(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
-        return srcSliceH;
-    }
-
-    /* This code assumes:
-
-    1) dst is 16 bytes-aligned
-    2) dstStride is a multiple of 16
-    3) width is a multiple of 16
-    4) lum & chrom stride are multiples of 8
-    */
-
-    for (y=0; y<height; y++) {
-        int i;
-        for (i = 0; i < width - 31; i+= 32) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_yA = vec_ld(i, ysrc);
-            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
-            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
-            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
-            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
-            vector unsigned char v_uA = vec_ld(j, usrc);
-            vector unsigned char v_uB = vec_ld(j + 16, usrc);
-            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
-            vector unsigned char v_vA = vec_ld(j, vsrc);
-            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
-            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
-            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
-            vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
-            vec_st(v_uyvy_0, (i << 1), dst);
-            vec_st(v_uyvy_1, (i << 1) + 16, dst);
-            vec_st(v_uyvy_2, (i << 1) + 32, dst);
-            vec_st(v_uyvy_3, (i << 1) + 48, dst);
-        }
-        if (i < width) {
-            const unsigned int j = i >> 1;
-            vector unsigned char v_y1 = vec_ld(i, ysrc);
-            vector unsigned char v_u = vec_ld(j, usrc);
-            vector unsigned char v_v = vec_ld(j, vsrc);
-            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
-            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
-            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
-            vec_st(v_uyvy_0, (i << 1), dst);
-            vec_st(v_uyvy_1, (i << 1) + 16, dst);
-        }
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
-            usrc += chromStride;
-            vsrc += chromStride;
-        }
-        ysrc += lumStride;
-        dst += dstStride;
-    }
-    return srcSliceH;
-}
diff --git a/libswscale/swscale_avoption.c b/libswscale/swscale_avoption.c
deleted file mode 100644
index 996843df1d..0000000000
--- a/libswscale/swscale_avoption.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/avutil.h"
-#include "libavcodec/opt.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-static const char * sws_context_to_name(void * ptr) {
-    return "swscaler";
-}
-
-#define OFFSET(x) offsetof(SwsContext, x)
-#define DEFAULT 0
-#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
-
-static const AVOption options[] = {
-    { "sws_flags", "scaler/cpu flags", OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, 0, UINT_MAX, VE, "sws_flags" },
-    { "fast_bilinear", "fast bilinear", 0, FF_OPT_TYPE_CONST, SWS_FAST_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bilinear", "bilinear", 0, FF_OPT_TYPE_CONST, SWS_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bicubic", "bicubic", 0, FF_OPT_TYPE_CONST, SWS_BICUBIC, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "experimental", "experimental", 0, FF_OPT_TYPE_CONST, SWS_X, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "neighbor", "nearest neighbor", 0, FF_OPT_TYPE_CONST, SWS_POINT, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "area", "averaging area", 0, FF_OPT_TYPE_CONST, SWS_AREA, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bicublin", "luma bicubic, chroma bilinear", 0, FF_OPT_TYPE_CONST, SWS_BICUBLIN, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "gauss", "gaussian", 0, FF_OPT_TYPE_CONST, SWS_GAUSS, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "sinc", "sinc", 0, FF_OPT_TYPE_CONST, SWS_SINC, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "lanczos", "lanczos", 0, FF_OPT_TYPE_CONST, SWS_LANCZOS, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { NULL }
-};
-
-const AVClass sws_context_class = { "SWScaler", sws_context_to_name, options };
diff --git a/libswscale/swscale_bfin.c b/libswscale/swscale_bfin.c
deleted file mode 100644
index ed7d9579b6..0000000000
--- a/libswscale/swscale_bfin.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
- *
- * Blackfin software video scaler operations
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <assert.h>
-#include "config.h"
-#include <unistd.h>
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-#ifdef __FDPIC__
-#define L1CODE __attribute__ ((l1_text))
-#else
-#define L1CODE
-#endif
-
-int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                        long width, long height,
-                        long lumStride, long chromStride, long srcStride) L1CODE;
-
-int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                        long width, long height,
-                        long lumStride, long chromStride, long srcStride) L1CODE;
-
-static int uyvytoyv12_unscaled (SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY;
-    uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2;
-    uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2;
-    uint8_t *ip   = src[0] + srcStride[0]*srcSliceY;
-    int w         = dstStride[0];
-
-    ff_bfin_uyvytoyv12 (ip, dsty, dstu, dstv, w, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-static int yuyvtoyv12_unscaled (SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY;
-    uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2;
-    uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2;
-    uint8_t *ip   = src[0] + srcStride[0]*srcSliceY;
-    int w         = dstStride[0];
-
-    ff_bfin_yuyvtoyv12 (ip, dsty, dstu, dstv, w, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
-
-    return srcSliceH;
-}
-
-
-void ff_bfin_get_unscaled_swscale (SwsContext *c)
-{
-    SwsFunc swScale = c->swScale;
-    if (c->flags & SWS_CPU_CAPS_BFIN)
-        if (c->dstFormat == PIX_FMT_YUV420P)
-            if (c->srcFormat == PIX_FMT_UYVY422) {
-                av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
-                c->swScale = uyvytoyv12_unscaled;
-            }
-        if (c->dstFormat == PIX_FMT_YUV420P)
-            if (c->srcFormat == PIX_FMT_YUYV422) {
-                av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
-                c->swScale = yuyvtoyv12_unscaled;
-            }
-}
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
deleted file mode 100644
index cdf3754d14..0000000000
--- a/libswscale/swscale_internal.h
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_SWSCALE_INTERNAL_H
-#define SWSCALE_SWSCALE_INTERNAL_H
-
-#include "config.h"
-
-#if HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
-#include "libavutil/avutil.h"
-
-#define STR(s)         AV_TOSTRING(s) //AV_STRINGIFY is too long
-
-#define MAX_FILTER_SIZE 256
-
-#define VOFW 2048
-#define VOF  (VOFW*2)
-
-#ifdef WORDS_BIGENDIAN
-#define ALT32_CORR (-1)
-#else
-#define ALT32_CORR   1
-#endif
-
-#if ARCH_X86_64
-#   define APCK_PTR2 8
-#   define APCK_COEF 16
-#   define APCK_SIZE 24
-#else
-#   define APCK_PTR2 4
-#   define APCK_COEF 8
-#   define APCK_SIZE 16
-#endif
-
-struct SwsContext;
-
-typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
-             int srcSliceH, uint8_t* dst[], int dstStride[]);
-
-/* This struct should be aligned on at least a 32-byte boundary. */
-typedef struct SwsContext{
-    /**
-     * info on struct for av_log
-     */
-    const AVClass *av_class;
-
-    /**
-     * Note that src, dst, srcStride, dstStride will be copied in the
-     * sws_scale() wrapper so they can be freely modified here.
-     */
-    SwsFunc swScale;
-    int srcW, srcH, dstH;
-    int chrSrcW, chrSrcH, chrDstW, chrDstH;
-    int lumXInc, chrXInc;
-    int lumYInc, chrYInc;
-    enum PixelFormat dstFormat, srcFormat;  ///< format 4:2:0 type is always YV12
-    int origDstFormat, origSrcFormat;       ///< format
-    int chrSrcHSubSample, chrSrcVSubSample;
-    int chrIntHSubSample, chrIntVSubSample;
-    int chrDstHSubSample, chrDstVSubSample;
-    int vChrDrop;
-    int sliceDir;
-    double param[2];
-
-    uint32_t pal_yuv[256];
-    uint32_t pal_rgb[256];
-
-    int16_t **lumPixBuf;
-    int16_t **chrPixBuf;
-    int16_t *hLumFilter;
-    int16_t *hLumFilterPos;
-    int16_t *hChrFilter;
-    int16_t *hChrFilterPos;
-    int16_t *vLumFilter;
-    int16_t *vLumFilterPos;
-    int16_t *vChrFilter;
-    int16_t *vChrFilterPos;
-
-    uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful
-
-    int hLumFilterSize;
-    int hChrFilterSize;
-    int vLumFilterSize;
-    int vChrFilterSize;
-    int vLumBufSize;
-    int vChrBufSize;
-
-    uint8_t *funnyYCode;
-    uint8_t *funnyUVCode;
-    int32_t *lumMmx2FilterPos;
-    int32_t *chrMmx2FilterPos;
-    int16_t *lumMmx2Filter;
-    int16_t *chrMmx2Filter;
-
-    int canMMX2BeUsed;
-
-    int lastInLumBuf;
-    int lastInChrBuf;
-    int lumBufIndex;
-    int chrBufIndex;
-    int dstY;
-    int flags;
-    void * yuvTable;            // pointer to the yuv->rgb table start so it can be freed()
-    uint8_t * table_rV[256];
-    uint8_t * table_gU[256];
-    int    table_gV[256];
-    uint8_t * table_bU[256];
-
-    //Colorspace stuff
-    int contrast, brightness, saturation;    // for sws_getColorspaceDetails
-    int srcColorspaceTable[4];
-    int dstColorspaceTable[4];
-    int srcRange, dstRange;
-    int yuv2rgb_y_offset;
-    int yuv2rgb_y_coeff;
-    int yuv2rgb_v2r_coeff;
-    int yuv2rgb_v2g_coeff;
-    int yuv2rgb_u2g_coeff;
-    int yuv2rgb_u2b_coeff;
-
-#define RED_DITHER            "0*8"
-#define GREEN_DITHER          "1*8"
-#define BLUE_DITHER           "2*8"
-#define Y_COEFF               "3*8"
-#define VR_COEFF              "4*8"
-#define UB_COEFF              "5*8"
-#define VG_COEFF              "6*8"
-#define UG_COEFF              "7*8"
-#define Y_OFFSET              "8*8"
-#define U_OFFSET              "9*8"
-#define V_OFFSET              "10*8"
-#define LUM_MMX_FILTER_OFFSET "11*8"
-#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
-#define DSTW_OFFSET           "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
-#define ESP_OFFSET            "11*8+4*4*256*2+8"
-#define VROUNDER_OFFSET       "11*8+4*4*256*2+16"
-#define U_TEMP                "11*8+4*4*256*2+24"
-#define V_TEMP                "11*8+4*4*256*2+32"
-
-    uint64_t redDither   __attribute__((aligned(8)));
-    uint64_t greenDither __attribute__((aligned(8)));
-    uint64_t blueDither  __attribute__((aligned(8)));
-
-    uint64_t yCoeff      __attribute__((aligned(8)));
-    uint64_t vrCoeff     __attribute__((aligned(8)));
-    uint64_t ubCoeff     __attribute__((aligned(8)));
-    uint64_t vgCoeff     __attribute__((aligned(8)));
-    uint64_t ugCoeff     __attribute__((aligned(8)));
-    uint64_t yOffset     __attribute__((aligned(8)));
-    uint64_t uOffset     __attribute__((aligned(8)));
-    uint64_t vOffset     __attribute__((aligned(8)));
-    int32_t  lumMmxFilter[4*MAX_FILTER_SIZE];
-    int32_t  chrMmxFilter[4*MAX_FILTER_SIZE];
-    int dstW;
-    uint64_t esp          __attribute__((aligned(8)));
-    uint64_t vRounder     __attribute__((aligned(8)));
-    uint64_t u_temp       __attribute__((aligned(8)));
-    uint64_t v_temp       __attribute__((aligned(8)));
-
-#if HAVE_ALTIVEC
-
-  vector signed short   CY;
-  vector signed short   CRV;
-  vector signed short   CBU;
-  vector signed short   CGU;
-  vector signed short   CGV;
-  vector signed short   OY;
-  vector unsigned short CSHIFT;
-  vector signed short   *vYCoeffsBank, *vCCoeffsBank;
-
-#endif
-
-
-#if ARCH_BFIN
-    uint32_t oy           __attribute__((aligned(4)));
-    uint32_t oc           __attribute__((aligned(4)));
-    uint32_t zero         __attribute__((aligned(4)));
-    uint32_t cy           __attribute__((aligned(4)));
-    uint32_t crv          __attribute__((aligned(4)));
-    uint32_t rmask        __attribute__((aligned(4)));
-    uint32_t cbu          __attribute__((aligned(4)));
-    uint32_t bmask        __attribute__((aligned(4)));
-    uint32_t cgu          __attribute__((aligned(4)));
-    uint32_t cgv          __attribute__((aligned(4)));
-    uint32_t gmask        __attribute__((aligned(4)));
-#endif
-
-#if HAVE_VIS
-    uint64_t sparc_coeffs[10] __attribute__((aligned(8)));
-#endif
-
-} SwsContext;
-//FIXME check init (where 0)
-
-SwsFunc sws_yuv2rgb_get_func_ptr (SwsContext *c);
-int sws_yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation);
-
-void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation);
-SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c);
-void altivec_yuv2packedX (SwsContext *c,
-                          int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                          int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                          uint8_t *dest, int dstW, int dstY);
-
-const char *sws_format_name(int format);
-
-//FIXME replace this with something faster
-#define isPlanarYUV(x)  (           \
-           (x)==PIX_FMT_YUV410P     \
-        || (x)==PIX_FMT_YUV420P     \
-        || (x)==PIX_FMT_YUV411P     \
-        || (x)==PIX_FMT_YUV422P     \
-        || (x)==PIX_FMT_YUV444P     \
-        || (x)==PIX_FMT_YUV440P     \
-        || (x)==PIX_FMT_NV12        \
-        || (x)==PIX_FMT_NV21        \
-    )
-#define isYUV(x)        (           \
-           (x)==PIX_FMT_UYVY422     \
-        || (x)==PIX_FMT_YUYV422     \
-        || isPlanarYUV(x)           \
-    )
-#define isGray(x)       (           \
-           (x)==PIX_FMT_GRAY8       \
-        || (x)==PIX_FMT_GRAY16BE    \
-        || (x)==PIX_FMT_GRAY16LE    \
-    )
-#define isGray16(x)     (           \
-           (x)==PIX_FMT_GRAY16BE    \
-        || (x)==PIX_FMT_GRAY16LE    \
-    )
-#define isRGB(x)        (           \
-           (x)==PIX_FMT_RGB32       \
-        || (x)==PIX_FMT_RGB32_1     \
-        || (x)==PIX_FMT_RGB24       \
-        || (x)==PIX_FMT_RGB565      \
-        || (x)==PIX_FMT_RGB555      \
-        || (x)==PIX_FMT_RGB8        \
-        || (x)==PIX_FMT_RGB4        \
-        || (x)==PIX_FMT_RGB4_BYTE   \
-        || (x)==PIX_FMT_MONOBLACK   \
-        || (x)==PIX_FMT_MONOWHITE   \
-    )
-#define isBGR(x)        (           \
-           (x)==PIX_FMT_BGR32       \
-        || (x)==PIX_FMT_BGR32_1     \
-        || (x)==PIX_FMT_BGR24       \
-        || (x)==PIX_FMT_BGR565      \
-        || (x)==PIX_FMT_BGR555      \
-        || (x)==PIX_FMT_BGR8        \
-        || (x)==PIX_FMT_BGR4        \
-        || (x)==PIX_FMT_BGR4_BYTE   \
-        || (x)==PIX_FMT_MONOBLACK   \
-        || (x)==PIX_FMT_MONOWHITE   \
-    )
-#define isALPHA(x)      (           \
-           (x)==PIX_FMT_BGR32       \
-        || (x)==PIX_FMT_BGR32_1     \
-        || (x)==PIX_FMT_RGB32       \
-        || (x)==PIX_FMT_RGB32_1     \
-        || (x)==PIX_FMT_YUVA420P    \
-    )
-
-static inline int fmt_depth(int fmt)
-{
-    switch(fmt) {
-        case PIX_FMT_BGRA:
-        case PIX_FMT_ABGR:
-        case PIX_FMT_RGBA:
-        case PIX_FMT_ARGB:
-            return 32;
-        case PIX_FMT_BGR24:
-        case PIX_FMT_RGB24:
-            return 24;
-        case PIX_FMT_BGR565:
-        case PIX_FMT_RGB565:
-        case PIX_FMT_GRAY16BE:
-        case PIX_FMT_GRAY16LE:
-            return 16;
-        case PIX_FMT_BGR555:
-        case PIX_FMT_RGB555:
-            return 15;
-        case PIX_FMT_BGR8:
-        case PIX_FMT_RGB8:
-            return 8;
-        case PIX_FMT_BGR4:
-        case PIX_FMT_RGB4:
-        case PIX_FMT_BGR4_BYTE:
-        case PIX_FMT_RGB4_BYTE:
-            return 4;
-        case PIX_FMT_MONOBLACK:
-        case PIX_FMT_MONOWHITE:
-            return 1;
-        default:
-            return 0;
-    }
-}
-
-extern const uint64_t ff_dither4[2];
-extern const uint64_t ff_dither8[2];
-
-extern const AVClass sws_context_class;
-
-#endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
deleted file mode 100644
index 3262b6ee85..0000000000
--- a/libswscale/swscale_template.c
+++ /dev/null
@@ -1,3041 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * The C code (not assembly, MMX, ...) of this file can be used
- * under the LGPL license.
- */
-
-#undef REAL_MOVNTQ
-#undef MOVNTQ
-#undef PAVGB
-#undef PREFETCH
-#undef PREFETCHW
-#undef EMMS
-#undef SFENCE
-
-#if HAVE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS     "femms"
-#else
-#define EMMS     "emms"
-#endif
-
-#if HAVE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#define PREFETCHW "prefetchw"
-#elif HAVE_MMX2
-#define PREFETCH "prefetchnta"
-#define PREFETCHW "prefetcht0"
-#else
-#define PREFETCH  " # nop"
-#define PREFETCHW " # nop"
-#endif
-
-#if HAVE_MMX2
-#define SFENCE "sfence"
-#else
-#define SFENCE " # nop"
-#endif
-
-#if HAVE_MMX2
-#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif HAVE_AMD3DNOW
-#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
-#endif
-
-#if HAVE_MMX2
-#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
-#else
-#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
-#endif
-#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
-
-#if HAVE_ALTIVEC
-#include "swscale_altivec_template.c"
-#endif
-
-#define YSCALEYUV2YV12X(x, offset, dest, width) \
-    __asm__ volatile(\
-    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
-    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
-    "movq                             %%mm3, %%mm4      \n\t"\
-    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-    ASMALIGN(4) /* FIXME Unroll? */\
-    "1:                                                 \n\t"\
-    "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
-    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
-    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
-    "add                                $16, %%"REG_d"  \n\t"\
-    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-    "test                         %%"REG_S", %%"REG_S"  \n\t"\
-    "pmulhw                           %%mm0, %%mm2      \n\t"\
-    "pmulhw                           %%mm0, %%mm5      \n\t"\
-    "paddw                            %%mm2, %%mm3      \n\t"\
-    "paddw                            %%mm5, %%mm4      \n\t"\
-    " jnz                                1b             \n\t"\
-    "psraw                               $3, %%mm3      \n\t"\
-    "psraw                               $3, %%mm4      \n\t"\
-    "packuswb                         %%mm4, %%mm3      \n\t"\
-    MOVNTQ(%%mm3, (%1, %%REGa))\
-    "add                                 $8, %%"REG_a"  \n\t"\
-    "cmp                                 %2, %%"REG_a"  \n\t"\
-    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
-    "movq                             %%mm3, %%mm4      \n\t"\
-    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-    "jb                                  1b             \n\t"\
-    :: "r" (&c->redDither),\
-    "r" (dest), "g" (width)\
-    : "%"REG_a, "%"REG_d, "%"REG_S\
-    );
-
-#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
-    __asm__ volatile(\
-    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
-    "pxor                             %%mm4, %%mm4      \n\t"\
-    "pxor                             %%mm5, %%mm5      \n\t"\
-    "pxor                             %%mm6, %%mm6      \n\t"\
-    "pxor                             %%mm7, %%mm7      \n\t"\
-    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-    ASMALIGN(4) \
-    "1:                                                 \n\t"\
-    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
-    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
-    "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
-    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
-    "movq                             %%mm0, %%mm3      \n\t"\
-    "punpcklwd                        %%mm1, %%mm0      \n\t"\
-    "punpckhwd                        %%mm1, %%mm3      \n\t"\
-    "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
-    "pmaddwd                          %%mm1, %%mm0      \n\t"\
-    "pmaddwd                          %%mm1, %%mm3      \n\t"\
-    "paddd                            %%mm0, %%mm4      \n\t"\
-    "paddd                            %%mm3, %%mm5      \n\t"\
-    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
-    "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
-    "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
-    "test                         %%"REG_S", %%"REG_S"  \n\t"\
-    "movq                             %%mm2, %%mm0      \n\t"\
-    "punpcklwd                        %%mm3, %%mm2      \n\t"\
-    "punpckhwd                        %%mm3, %%mm0      \n\t"\
-    "pmaddwd                          %%mm1, %%mm2      \n\t"\
-    "pmaddwd                          %%mm1, %%mm0      \n\t"\
-    "paddd                            %%mm2, %%mm6      \n\t"\
-    "paddd                            %%mm0, %%mm7      \n\t"\
-    " jnz                                1b             \n\t"\
-    "psrad                              $16, %%mm4      \n\t"\
-    "psrad                              $16, %%mm5      \n\t"\
-    "psrad                              $16, %%mm6      \n\t"\
-    "psrad                              $16, %%mm7      \n\t"\
-    "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
-    "packssdw                         %%mm5, %%mm4      \n\t"\
-    "packssdw                         %%mm7, %%mm6      \n\t"\
-    "paddw                            %%mm0, %%mm4      \n\t"\
-    "paddw                            %%mm0, %%mm6      \n\t"\
-    "psraw                               $3, %%mm4      \n\t"\
-    "psraw                               $3, %%mm6      \n\t"\
-    "packuswb                         %%mm6, %%mm4      \n\t"\
-    MOVNTQ(%%mm4, (%1, %%REGa))\
-    "add                                 $8, %%"REG_a"  \n\t"\
-    "cmp                                 %2, %%"REG_a"  \n\t"\
-    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
-    "pxor                             %%mm4, %%mm4      \n\t"\
-    "pxor                             %%mm5, %%mm5      \n\t"\
-    "pxor                             %%mm6, %%mm6      \n\t"\
-    "pxor                             %%mm7, %%mm7      \n\t"\
-    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-    "jb                                  1b             \n\t"\
-    :: "r" (&c->redDither),\
-    "r" (dest), "g" (width)\
-    : "%"REG_a, "%"REG_d, "%"REG_S\
-    );
-
-#define YSCALEYUV2YV121 \
-    "mov %2, %%"REG_a"                    \n\t"\
-    ASMALIGN(4) /* FIXME Unroll? */\
-    "1:                                   \n\t"\
-    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
-    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
-    "psraw                 $7, %%mm0      \n\t"\
-    "psraw                 $7, %%mm1      \n\t"\
-    "packuswb           %%mm1, %%mm0      \n\t"\
-    MOVNTQ(%%mm0, (%1, %%REGa))\
-    "add                   $8, %%"REG_a"  \n\t"\
-    "jnc                   1b             \n\t"
-
-#define YSCALEYUV2YV121_ACCURATE \
-    "mov %2, %%"REG_a"                    \n\t"\
-    "pcmpeqw %%mm7, %%mm7                 \n\t"\
-    "psrlw                 $15, %%mm7     \n\t"\
-    "psllw                  $6, %%mm7     \n\t"\
-    ASMALIGN(4) /* FIXME Unroll? */\
-    "1:                                   \n\t"\
-    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
-    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
-    "paddsw             %%mm7, %%mm0      \n\t"\
-    "paddsw             %%mm7, %%mm1      \n\t"\
-    "psraw                 $7, %%mm0      \n\t"\
-    "psraw                 $7, %%mm1      \n\t"\
-    "packuswb           %%mm1, %%mm0      \n\t"\
-    MOVNTQ(%%mm0, (%1, %%REGa))\
-    "add                   $8, %%"REG_a"  \n\t"\
-    "jnc                   1b             \n\t"
-
-/*
-    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
-       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
-       "r" (dest), "m" (dstW),
-       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
-    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
-*/
-#define YSCALEYUV2PACKEDX_UV \
-    __asm__ volatile(\
-    "xor                   %%"REG_a", %%"REG_a"     \n\t"\
-    ASMALIGN(4)\
-    "nop                                            \n\t"\
-    "1:                                             \n\t"\
-    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
-    "movq                      %%mm3, %%mm4         \n\t"\
-    ASMALIGN(4)\
-    "2:                                             \n\t"\
-    "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
-    "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
-    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
-    "add                         $16, %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pmulhw                    %%mm0, %%mm2         \n\t"\
-    "pmulhw                    %%mm0, %%mm5         \n\t"\
-    "paddw                     %%mm2, %%mm3         \n\t"\
-    "paddw                     %%mm5, %%mm4         \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    " jnz                         2b                \n\t"\
-
-#define YSCALEYUV2PACKEDX_YA(offset) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), %%mm1         \n\t"\
-    "movq                      %%mm1, %%mm7         \n\t"\
-    ASMALIGN(4)\
-    "2:                                             \n\t"\
-    "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5         \n\t" /* Y2srcData */\
-    "add                         $16, %%"REG_d"            \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pmulhw                    %%mm0, %%mm2         \n\t"\
-    "pmulhw                    %%mm0, %%mm5         \n\t"\
-    "paddw                     %%mm2, %%mm1         \n\t"\
-    "paddw                     %%mm5, %%mm7         \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    " jnz                         2b                \n\t"\
-
-#define YSCALEYUV2PACKEDX \
-    YSCALEYUV2PACKEDX_UV \
-    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET) \
-
-#define YSCALEYUV2PACKEDX_END                 \
-    :: "r" (&c->redDither),                   \
-        "m" (dummy), "m" (dummy), "m" (dummy),\
-        "r" (dest), "m" (dstW)                \
-    : "%"REG_a, "%"REG_d, "%"REG_S            \
-    );
-
-#define YSCALEYUV2PACKEDX_ACCURATE_UV \
-    __asm__ volatile(\
-    "xor %%"REG_a", %%"REG_a"                       \n\t"\
-    ASMALIGN(4)\
-    "nop                                            \n\t"\
-    "1:                                             \n\t"\
-    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pxor                      %%mm4, %%mm4         \n\t"\
-    "pxor                      %%mm5, %%mm5         \n\t"\
-    "pxor                      %%mm6, %%mm6         \n\t"\
-    "pxor                      %%mm7, %%mm7         \n\t"\
-    ASMALIGN(4)\
-    "2:                                             \n\t"\
-    "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
-    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
-    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
-    "movq                      %%mm0, %%mm3         \n\t"\
-    "punpcklwd                 %%mm1, %%mm0         \n\t"\
-    "punpckhwd                 %%mm1, %%mm3         \n\t"\
-    "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
-    "pmaddwd                   %%mm1, %%mm0         \n\t"\
-    "pmaddwd                   %%mm1, %%mm3         \n\t"\
-    "paddd                     %%mm0, %%mm4         \n\t"\
-    "paddd                     %%mm3, %%mm5         \n\t"\
-    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
-    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    "movq                      %%mm2, %%mm0         \n\t"\
-    "punpcklwd                 %%mm3, %%mm2         \n\t"\
-    "punpckhwd                 %%mm3, %%mm0         \n\t"\
-    "pmaddwd                   %%mm1, %%mm2         \n\t"\
-    "pmaddwd                   %%mm1, %%mm0         \n\t"\
-    "paddd                     %%mm2, %%mm6         \n\t"\
-    "paddd                     %%mm0, %%mm7         \n\t"\
-    " jnz                         2b                \n\t"\
-    "psrad                       $16, %%mm4         \n\t"\
-    "psrad                       $16, %%mm5         \n\t"\
-    "psrad                       $16, %%mm6         \n\t"\
-    "psrad                       $16, %%mm7         \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
-    "packssdw                  %%mm5, %%mm4         \n\t"\
-    "packssdw                  %%mm7, %%mm6         \n\t"\
-    "paddw                     %%mm0, %%mm4         \n\t"\
-    "paddw                     %%mm0, %%mm6         \n\t"\
-    "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
-    "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
-
-#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
-    "pxor                      %%mm1, %%mm1         \n\t"\
-    "pxor                      %%mm5, %%mm5         \n\t"\
-    "pxor                      %%mm7, %%mm7         \n\t"\
-    "pxor                      %%mm6, %%mm6         \n\t"\
-    ASMALIGN(4)\
-    "2:                                             \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
-    "movq                      %%mm0, %%mm3         \n\t"\
-    "punpcklwd                 %%mm4, %%mm0         \n\t"\
-    "punpckhwd                 %%mm4, %%mm3         \n\t"\
-    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
-    "pmaddwd                   %%mm4, %%mm0         \n\t"\
-    "pmaddwd                   %%mm4, %%mm3         \n\t"\
-    "paddd                     %%mm0, %%mm1         \n\t"\
-    "paddd                     %%mm3, %%mm5         \n\t"\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
-    "movq                      %%mm2, %%mm0         \n\t"\
-    "punpcklwd                 %%mm3, %%mm2         \n\t"\
-    "punpckhwd                 %%mm3, %%mm0         \n\t"\
-    "pmaddwd                   %%mm4, %%mm2         \n\t"\
-    "pmaddwd                   %%mm4, %%mm0         \n\t"\
-    "paddd                     %%mm2, %%mm7         \n\t"\
-    "paddd                     %%mm0, %%mm6         \n\t"\
-    " jnz                         2b                \n\t"\
-    "psrad                       $16, %%mm1         \n\t"\
-    "psrad                       $16, %%mm5         \n\t"\
-    "psrad                       $16, %%mm7         \n\t"\
-    "psrad                       $16, %%mm6         \n\t"\
-    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
-    "packssdw                  %%mm5, %%mm1         \n\t"\
-    "packssdw                  %%mm6, %%mm7         \n\t"\
-    "paddw                     %%mm0, %%mm1         \n\t"\
-    "paddw                     %%mm0, %%mm7         \n\t"\
-    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
-    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
-
-#define YSCALEYUV2PACKEDX_ACCURATE \
-    YSCALEYUV2PACKEDX_ACCURATE_UV \
-    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
-
-#define YSCALEYUV2RGBX \
-    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
-    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
-    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
-    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
-/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
-    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
-    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
-    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
-/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw           %%mm3, %%mm4       \n\t"\
-    "movq            %%mm2, %%mm0       \n\t"\
-    "movq            %%mm5, %%mm6       \n\t"\
-    "movq            %%mm4, %%mm3       \n\t"\
-    "punpcklwd       %%mm2, %%mm2       \n\t"\
-    "punpcklwd       %%mm5, %%mm5       \n\t"\
-    "punpcklwd       %%mm4, %%mm4       \n\t"\
-    "paddw           %%mm1, %%mm2       \n\t"\
-    "paddw           %%mm1, %%mm5       \n\t"\
-    "paddw           %%mm1, %%mm4       \n\t"\
-    "punpckhwd       %%mm0, %%mm0       \n\t"\
-    "punpckhwd       %%mm6, %%mm6       \n\t"\
-    "punpckhwd       %%mm3, %%mm3       \n\t"\
-    "paddw           %%mm7, %%mm0       \n\t"\
-    "paddw           %%mm7, %%mm6       \n\t"\
-    "paddw           %%mm7, %%mm3       \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb        %%mm0, %%mm2       \n\t"\
-    "packuswb        %%mm6, %%mm5       \n\t"\
-    "packuswb        %%mm3, %%mm4       \n\t"\
-
-#define REAL_YSCALEYUV2PACKED(index, c) \
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
-    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
-    "psraw                $3, %%mm0                           \n\t"\
-    "psraw                $3, %%mm1                           \n\t"\
-    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
-    "xor            "#index", "#index"                        \n\t"\
-    ASMALIGN(4)\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
-
-#define REAL_YSCALEYUV2RGB_UV(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ASMALIGN(4)\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
-    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
-    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
-    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
-    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-
-#define REAL_YSCALEYUV2RGB_YA(index, c) \
-    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
-    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
-    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
-    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
-
-#define REAL_YSCALEYUV2RGB_COEFF(c) \
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB_YA(index, c) REAL_YSCALEYUV2RGB_YA(index, c)
-
-#define YSCALEYUV2RGB(index, c) \
-    REAL_YSCALEYUV2RGB_UV(index, c) \
-    REAL_YSCALEYUV2RGB_YA(index, c) \
-    REAL_YSCALEYUV2RGB_COEFF(c)
-
-#define REAL_YSCALEYUV2PACKED1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ASMALIGN(4)\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $7, %%mm3     \n\t" \
-    "psraw                $7, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t" \
-
-#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
-
-#define REAL_YSCALEYUV2RGB1(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ASMALIGN(4)\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
-    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
-
-#define REAL_YSCALEYUV2PACKED1b(index, c) \
-    "xor "#index", "#index"             \n\t"\
-    ASMALIGN(4)\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $8, %%mm3     \n\t" \
-    "psrlw                $8, %%mm4     \n\t" \
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $7, %%mm1     \n\t" \
-    "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
-
-// do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c) \
-    "xor            "#index", "#index"  \n\t"\
-    ASMALIGN(4)\
-    "1:                                 \n\t"\
-    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
-    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
-    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
-    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
-    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
-    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
-    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
-    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
-    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
-    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
-    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
-    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
-    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
-    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
-    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
-    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
-    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
-    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
-    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
-    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
-    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
-    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
-    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
-    "paddw             %%mm3, %%mm4     \n\t"\
-    "movq              %%mm2, %%mm0     \n\t"\
-    "movq              %%mm5, %%mm6     \n\t"\
-    "movq              %%mm4, %%mm3     \n\t"\
-    "punpcklwd         %%mm2, %%mm2     \n\t"\
-    "punpcklwd         %%mm5, %%mm5     \n\t"\
-    "punpcklwd         %%mm4, %%mm4     \n\t"\
-    "paddw             %%mm1, %%mm2     \n\t"\
-    "paddw             %%mm1, %%mm5     \n\t"\
-    "paddw             %%mm1, %%mm4     \n\t"\
-    "punpckhwd         %%mm0, %%mm0     \n\t"\
-    "punpckhwd         %%mm6, %%mm6     \n\t"\
-    "punpckhwd         %%mm3, %%mm3     \n\t"\
-    "paddw             %%mm7, %%mm0     \n\t"\
-    "paddw             %%mm7, %%mm6     \n\t"\
-    "paddw             %%mm7, %%mm3     \n\t"\
-    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
-    "packuswb          %%mm0, %%mm2     \n\t"\
-    "packuswb          %%mm6, %%mm5     \n\t"\
-    "packuswb          %%mm3, %%mm4     \n\t"\
-
-#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
-
-#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
-    "movq       "#b", "#q2"     \n\t" /* B */\
-    "movq       "#r", "#t"      \n\t" /* R */\
-    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
-    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
-    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
-    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
-    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
-    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
-    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
-\
-    MOVNTQ(   q0,   (dst, index, 4))\
-    MOVNTQ(    b,  8(dst, index, 4))\
-    MOVNTQ(   q2, 16(dst, index, 4))\
-    MOVNTQ(   q3, 24(dst, index, 4))\
-\
-    "add      $8, "#index"      \n\t"\
-    "cmp "#dstw", "#index"      \n\t"\
-    " jb      1b                \n\t"
-#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
-
-#define REAL_WRITERGB16(dst, dstw, index) \
-    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
-    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
-    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
-    "psrlq           $3, %%mm2  \n\t"\
-\
-    "movq         %%mm2, %%mm1  \n\t"\
-    "movq         %%mm4, %%mm3  \n\t"\
-\
-    "punpcklbw    %%mm7, %%mm3  \n\t"\
-    "punpcklbw    %%mm5, %%mm2  \n\t"\
-    "punpckhbw    %%mm7, %%mm4  \n\t"\
-    "punpckhbw    %%mm5, %%mm1  \n\t"\
-\
-    "psllq           $3, %%mm3  \n\t"\
-    "psllq           $3, %%mm4  \n\t"\
-\
-    "por          %%mm3, %%mm2  \n\t"\
-    "por          %%mm4, %%mm1  \n\t"\
-\
-    MOVNTQ(%%mm2,  (dst, index, 2))\
-    MOVNTQ(%%mm1, 8(dst, index, 2))\
-\
-    "add             $8, "#index"   \n\t"\
-    "cmp        "#dstw", "#index"   \n\t"\
-    " jb             1b             \n\t"
-#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
-
-#define REAL_WRITERGB15(dst, dstw, index) \
-    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
-    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
-    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
-    "psrlq           $3, %%mm2  \n\t"\
-    "psrlq           $1, %%mm5  \n\t"\
-\
-    "movq         %%mm2, %%mm1  \n\t"\
-    "movq         %%mm4, %%mm3  \n\t"\
-\
-    "punpcklbw    %%mm7, %%mm3  \n\t"\
-    "punpcklbw    %%mm5, %%mm2  \n\t"\
-    "punpckhbw    %%mm7, %%mm4  \n\t"\
-    "punpckhbw    %%mm5, %%mm1  \n\t"\
-\
-    "psllq           $2, %%mm3  \n\t"\
-    "psllq           $2, %%mm4  \n\t"\
-\
-    "por          %%mm3, %%mm2  \n\t"\
-    "por          %%mm4, %%mm1  \n\t"\
-\
-    MOVNTQ(%%mm2,  (dst, index, 2))\
-    MOVNTQ(%%mm1, 8(dst, index, 2))\
-\
-    "add             $8, "#index"   \n\t"\
-    "cmp        "#dstw", "#index"   \n\t"\
-    " jb             1b             \n\t"
-#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
-
-#define WRITEBGR24OLD(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq      %%mm2, %%mm1             \n\t" /* B */\
-    "movq      %%mm5, %%mm6             \n\t" /* R */\
-    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
-    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
-    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
-    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
-    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
-    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
-    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
-\
-    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
-    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
-    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
-    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
-    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
-    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
-    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
-    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
-\
-    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
-    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
-    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
-    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
-    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
-    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
-    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
-    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
-    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
-    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
-    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
-    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
-    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
-\
-    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
-    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
-    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
-    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
-    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
-    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
-    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
-    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
-\
-    MOVNTQ(%%mm0,   (dst))\
-    MOVNTQ(%%mm2,  8(dst))\
-    MOVNTQ(%%mm3, 16(dst))\
-    "add         $24, "#dst"            \n\t"\
-\
-    "add          $8, "#index"          \n\t"\
-    "cmp     "#dstw", "#index"          \n\t"\
-    " jb          1b                    \n\t"
-
-#define WRITEBGR24MMX(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq      %%mm2, %%mm1     \n\t" /* B */\
-    "movq      %%mm5, %%mm6     \n\t" /* R */\
-    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
-    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
-    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
-    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
-    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
-    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
-    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
-    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
-    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
-    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
-\
-    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
-    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
-    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
-    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
-\
-    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
-    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
-    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
-    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
-\
-    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
-    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
-    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
-    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
-\
-    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
-    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
-    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
-    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
-    MOVNTQ(%%mm0, (dst))\
-\
-    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
-    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
-    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
-    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
-    MOVNTQ(%%mm6, 8(dst))\
-\
-    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
-    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
-    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
-    MOVNTQ(%%mm5, 16(dst))\
-\
-    "add         $24, "#dst"    \n\t"\
-\
-    "add          $8, "#index"  \n\t"\
-    "cmp     "#dstw", "#index"  \n\t"\
-    " jb          1b            \n\t"
-
-#define WRITEBGR24MMX2(dst, dstw, index) \
-    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
-    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
-    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
-    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
-    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
-    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
-\
-    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
-    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
-    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
-\
-    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
-    "por    %%mm1, %%mm6        \n\t"\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, (dst))\
-\
-    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
-    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
-    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
-    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
-\
-    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
-    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
-    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
-\
-    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, 8(dst))\
-\
-    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
-    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
-    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
-\
-    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
-    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
-    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
-\
-    "por    %%mm1, %%mm3        \n\t"\
-    "por    %%mm3, %%mm6        \n\t"\
-    MOVNTQ(%%mm6, 16(dst))\
-\
-    "add      $24, "#dst"       \n\t"\
-\
-    "add       $8, "#index"     \n\t"\
-    "cmp  "#dstw", "#index"     \n\t"\
-    " jb       1b               \n\t"
-
-#if HAVE_MMX2
-#undef WRITEBGR24
-#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
-#else
-#undef WRITEBGR24
-#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
-#endif
-
-#define REAL_WRITEYUY2(dst, dstw, index) \
-    "packuswb  %%mm3, %%mm3     \n\t"\
-    "packuswb  %%mm4, %%mm4     \n\t"\
-    "packuswb  %%mm7, %%mm1     \n\t"\
-    "punpcklbw %%mm4, %%mm3     \n\t"\
-    "movq      %%mm1, %%mm7     \n\t"\
-    "punpcklbw %%mm3, %%mm1     \n\t"\
-    "punpckhbw %%mm3, %%mm7     \n\t"\
-\
-    MOVNTQ(%%mm1, (dst, index, 2))\
-    MOVNTQ(%%mm7, 8(dst, index, 2))\
-\
-    "add          $8, "#index"  \n\t"\
-    "cmp     "#dstw", "#index"  \n\t"\
-    " jb          1b            \n\t"
-#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
-
-
-static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
-{
-#if HAVE_MMX
-    if(!(c->flags & SWS_BITEXACT)){
-        if (c->flags & SWS_ACCURATE_RND){
-            if (uDest){
-                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-
-            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        }else{
-            if (uDest){
-                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
-                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
-            }
-
-            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
-        }
-        return;
-    }
-#endif
-#if HAVE_ALTIVEC
-yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
-                      chrFilter, chrSrc, chrFilterSize,
-                      dest, uDest, vDest, dstW, chrDstW);
-#else //HAVE_ALTIVEC
-yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
-            chrFilter, chrSrc, chrFilterSize,
-            dest, uDest, vDest, dstW, chrDstW);
-#endif //!HAVE_ALTIVEC
-}
-
-static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
-{
-yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
-             chrFilter, chrSrc, chrFilterSize,
-             dest, uDest, dstW, chrDstW, dstFormat);
-}
-
-static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
-{
-    int i;
-#if HAVE_MMX
-    if(!(c->flags & SWS_BITEXACT)){
-        long p= uDest ? 3 : 1;
-        uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
-        uint8_t *dst[3]= {dest, uDest, vDest};
-        long counter[3] = {dstW, chrDstW, chrDstW};
-
-        if (c->flags & SWS_ACCURATE_RND){
-            while(p--){
-                __asm__ volatile(
-                    YSCALEYUV2YV121_ACCURATE
-                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                    "g" (-counter[p])
-                    : "%"REG_a
-                );
-            }
-        }else{
-            while(p--){
-                __asm__ volatile(
-                    YSCALEYUV2YV121
-                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
-                    "g" (-counter[p])
-                    : "%"REG_a
-                );
-            }
-        }
-        return;
-    }
-#endif
-    for (i=0; i<dstW; i++)
-    {
-        int val= (lumSrc[i]+64)>>7;
-
-        if (val&256){
-            if (val<0) val=0;
-            else       val=255;
-        }
-
-        dest[i]= val;
-    }
-
-    if (uDest)
-        for (i=0; i<chrDstW; i++)
-        {
-            int u=(chrSrc[i       ]+64)>>7;
-            int v=(chrSrc[i + VOFW]+64)>>7;
-
-            if ((u|v)&256){
-                if (u<0)        u=0;
-                else if (u>255) u=255;
-                if (v<0)        v=0;
-                else if (v>255) v=255;
-            }
-
-            uDest[i]= u;
-            vDest[i]= v;
-        }
-}
-
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                                       int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                       uint8_t *dest, long dstW, long dstY)
-{
-#if HAVE_MMX
-    long dummy=0;
-    if(!(c->flags & SWS_BITEXACT)){
-        if (c->flags & SWS_ACCURATE_RND){
-            switch(c->dstFormat){
-            case PIX_FMT_RGB32:
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pcmpeqd %%mm7, %%mm7 \n\t"
-                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_BGR24:
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
-                "add %4, %%"REG_c"                        \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest), "m" (dstW)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
-                return;
-            case PIX_FMT_RGB555:
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
-                YSCALEYUV2PACKEDX_ACCURATE
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
-                YSCALEYUV2PACKEDX_ACCURATE
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            }
-        }else{
-            switch(c->dstFormat)
-            {
-            case PIX_FMT_RGB32:
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pcmpeqd %%mm7, %%mm7 \n\t"
-                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_BGR24:
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor                    %%mm7, %%mm7       \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
-                "add                        %4, %%"REG_c"   \n\t"
-                WRITEBGR24(%%REGc, %5, %%REGa)
-
-                :: "r" (&c->redDither),
-                "m" (dummy), "m" (dummy), "m" (dummy),
-                "r" (dest),  "m" (dstW)
-                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
-                );
-                return;
-            case PIX_FMT_RGB555:
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB15(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_RGB565:
-                YSCALEYUV2PACKEDX
-                YSCALEYUV2RGBX
-                "pxor %%mm7, %%mm7 \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
-                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
-                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
-#endif
-
-                WRITERGB16(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            case PIX_FMT_YUYV422:
-                YSCALEYUV2PACKEDX
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-
-                "psraw $3, %%mm3    \n\t"
-                "psraw $3, %%mm4    \n\t"
-                "psraw $3, %%mm1    \n\t"
-                "psraw $3, %%mm7    \n\t"
-                WRITEYUY2(%4, %5, %%REGa)
-                YSCALEYUV2PACKEDX_END
-                return;
-            }
-        }
-    }
-#endif /* HAVE_MMX */
-#if HAVE_ALTIVEC
-    /* The following list of supported dstFormat values should
-       match what's found in the body of altivec_yuv2packedX() */
-    if (!(c->flags & SWS_BITEXACT) &&
-       (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
-        c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
-        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
-            altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
-                                 chrFilter, chrSrc, chrFilterSize,
-                                 dest, dstW, dstY);
-    else
-#endif
-        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
-                       chrFilter, chrSrc, chrFilterSize,
-                       dest, dstW, dstY);
-}
-
-/**
- * vertical bilinear scale YV12 to RGB
- */
-static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
-                          uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
-{
-    int  yalpha1=4095- yalpha;
-    int uvalpha1=4095-uvalpha;
-    int i;
-
-#if HAVE_MMX
-    if(!(c->flags & SWS_BITEXACT)){
-        switch(c->dstFormat)
-        {
-            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
-            case PIX_FMT_RGB32:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pcmpeqd %%mm7, %%mm7                   \n\t"
-                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_BGR24:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB555:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB565:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_YUYV422:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov %4, %%"REG_b"                        \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2PACKED(%%REGBP, %5)
-                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            default: break;
-        }
-    }
-#endif //HAVE_MMX
-YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
-}
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
-                          uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
-{
-    const int yalpha1=0;
-    int i;
-
-    uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
-    if (flags&SWS_FULL_CHR_H_INT)
-    {
-        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
-        return;
-    }
-
-#if HAVE_MMX
-    if(!(flags & SWS_BITEXACT)){
-        if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
-        {
-            switch(dstFormat)
-            {
-            case PIX_FMT_RGB32:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
-                "pcmpeqd %%mm7, %%mm7                   \n\t"
-                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_BGR24:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB555:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB565:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_YUYV422:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2PACKED1(%%REGBP, %5)
-                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            }
-        }
-        else
-        {
-            switch(dstFormat)
-            {
-            case PIX_FMT_RGB32:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
-                "pcmpeqd %%mm7, %%mm7                   \n\t"
-                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_BGR24:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB555:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_RGB565:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
-                "pxor    %%mm7, %%mm7                   \n\t"
-                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
-                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
-                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
-#endif
-
-                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            case PIX_FMT_YUYV422:
-                __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2PACKED1b(%%REGBP, %5)
-                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
-
-                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
-                "a" (&c->redDither)
-                );
-                return;
-            }
-        }
-    }
-#endif /* HAVE_MMX */
-    if (uvalpha < 2048)
-    {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    }else{
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
-    }
-}
-
-//FIXME yuy2* can read up to 7 samples too much
-
-static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    __asm__ volatile(
-    "movq "MANGLE(bm01010101)", %%mm2           \n\t"
-    "mov                    %0, %%"REG_a"       \n\t"
-    "1:                                         \n\t"
-    "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
-    "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
-    "pand                %%mm2, %%mm0           \n\t"
-    "pand                %%mm2, %%mm1           \n\t"
-    "packuswb            %%mm1, %%mm0           \n\t"
-    "movq                %%mm0, (%2, %%"REG_a") \n\t"
-    "add                    $8, %%"REG_a"       \n\t"
-    " js                    1b                  \n\t"
-    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
-    : "%"REG_a
-    );
-#else
-    int i;
-    for (i=0; i<width; i++)
-        dst[i]= src[2*i];
-#endif
-}
-
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    __asm__ volatile(
-    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
-    "mov                    %0, %%"REG_a"       \n\t"
-    "1:                                         \n\t"
-    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
-    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
-    "psrlw                  $8, %%mm0           \n\t"
-    "psrlw                  $8, %%mm1           \n\t"
-    "packuswb            %%mm1, %%mm0           \n\t"
-    "movq                %%mm0, %%mm1           \n\t"
-    "psrlw                  $8, %%mm0           \n\t"
-    "pand                %%mm4, %%mm1           \n\t"
-    "packuswb            %%mm0, %%mm0           \n\t"
-    "packuswb            %%mm1, %%mm1           \n\t"
-    "movd                %%mm0, (%3, %%"REG_a") \n\t"
-    "movd                %%mm1, (%2, %%"REG_a") \n\t"
-    "add                    $4, %%"REG_a"       \n\t"
-    " js                    1b                  \n\t"
-    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
-    : "%"REG_a
-    );
-#else
-    int i;
-    for (i=0; i<width; i++)
-    {
-        dstU[i]= src1[4*i + 1];
-        dstV[i]= src1[4*i + 3];
-    }
-#endif
-    assert(src1 == src2);
-}
-
-/* This is almost identical to the previous, end exists only because
- * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    __asm__ volatile(
-    "mov                  %0, %%"REG_a"         \n\t"
-    "1:                                         \n\t"
-    "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
-    "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
-    "psrlw                $8, %%mm0             \n\t"
-    "psrlw                $8, %%mm1             \n\t"
-    "packuswb          %%mm1, %%mm0             \n\t"
-    "movq              %%mm0, (%2, %%"REG_a")   \n\t"
-    "add                  $8, %%"REG_a"         \n\t"
-    " js                  1b                    \n\t"
-    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
-    : "%"REG_a
-    );
-#else
-    int i;
-    for (i=0; i<width; i++)
-        dst[i]= src[2*i+1];
-#endif
-}
-
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    __asm__ volatile(
-    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
-    "mov                    %0, %%"REG_a"       \n\t"
-    "1:                                         \n\t"
-    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
-    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
-    "pand                %%mm4, %%mm0           \n\t"
-    "pand                %%mm4, %%mm1           \n\t"
-    "packuswb            %%mm1, %%mm0           \n\t"
-    "movq                %%mm0, %%mm1           \n\t"
-    "psrlw                  $8, %%mm0           \n\t"
-    "pand                %%mm4, %%mm1           \n\t"
-    "packuswb            %%mm0, %%mm0           \n\t"
-    "packuswb            %%mm1, %%mm1           \n\t"
-    "movd                %%mm0, (%3, %%"REG_a") \n\t"
-    "movd                %%mm1, (%2, %%"REG_a") \n\t"
-    "add                    $4, %%"REG_a"       \n\t"
-    " js                    1b                  \n\t"
-    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
-    : "%"REG_a
-    );
-#else
-    int i;
-    for (i=0; i<width; i++)
-    {
-        dstU[i]= src1[4*i + 0];
-        dstV[i]= src1[4*i + 2];
-    }
-#endif
-    assert(src1 == src2);
-}
-
-#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++)\
-    {\
-        int b= (((type*)src)[i]>>shb)&maskb;\
-        int g= (((type*)src)[i]>>shg)&maskg;\
-        int r= (((type*)src)[i]>>shr)&maskr;\
-\
-        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
-    }\
-}
-
-BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
-BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
-
-#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
-static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++)\
-    {\
-        int b= (((type*)src)[i]&maskb)>>shb;\
-        int g= (((type*)src)[i]&maskg)>>shg;\
-        int r= (((type*)src)[i]&maskr)>>shr;\
-\
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
-    }\
-}\
-static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++)\
-    {\
-        int pix0= ((type*)src)[2*i+0];\
-        int pix1= ((type*)src)[2*i+1];\
-        int g= (pix0&(maskg|maska))+(pix1&(maskg|maska));\
-        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
-        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
-        g&= maskg|(2*maskg);\
-\
-        g>>=shg;\
-\
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
-    }\
-}
-
-BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0,          0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0,          0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
-BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0,          0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0,          0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
-
-#if HAVE_MMX
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat)
-{
-
-    if(srcFormat == PIX_FMT_BGR24){
-        __asm__ volatile(
-            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
-            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
-            :
-        );
-    }else{
-        __asm__ volatile(
-            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
-            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
-            :
-        );
-    }
-
-    __asm__ volatile(
-        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
-        "mov                        %2, %%"REG_a"   \n\t"
-        "pxor                    %%mm7, %%mm7       \n\t"
-        "1:                                         \n\t"
-        PREFETCH"               64(%0)              \n\t"
-        "movd                     (%0), %%mm0       \n\t"
-        "movd                    2(%0), %%mm1       \n\t"
-        "movd                    6(%0), %%mm2       \n\t"
-        "movd                    8(%0), %%mm3       \n\t"
-        "add                       $12, %0          \n\t"
-        "punpcklbw               %%mm7, %%mm0       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm2       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "pmaddwd                 %%mm5, %%mm0       \n\t"
-        "pmaddwd                 %%mm6, %%mm1       \n\t"
-        "pmaddwd                 %%mm5, %%mm2       \n\t"
-        "pmaddwd                 %%mm6, %%mm3       \n\t"
-        "paddd                   %%mm1, %%mm0       \n\t"
-        "paddd                   %%mm3, %%mm2       \n\t"
-        "paddd                   %%mm4, %%mm0       \n\t"
-        "paddd                   %%mm4, %%mm2       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
-        "packssdw                %%mm2, %%mm0       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
-        " js                        1b              \n\t"
-    : "+r" (src)
-    : "r" (dst+width), "g" (-width)
-    : "%"REG_a
-    );
-}
-
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat)
-{
-    __asm__ volatile(
-        "movq                    24+%4, %%mm6       \n\t"
-        "mov                        %3, %%"REG_a"   \n\t"
-        "pxor                    %%mm7, %%mm7       \n\t"
-        "1:                                         \n\t"
-        PREFETCH"               64(%0)              \n\t"
-        "movd                     (%0), %%mm0       \n\t"
-        "movd                    2(%0), %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm0       \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "movq                    %%mm0, %%mm2       \n\t"
-        "movq                    %%mm1, %%mm3       \n\t"
-        "pmaddwd                    %4, %%mm0       \n\t"
-        "pmaddwd                  8+%4, %%mm1       \n\t"
-        "pmaddwd                 16+%4, %%mm2       \n\t"
-        "pmaddwd                 %%mm6, %%mm3       \n\t"
-        "paddd                   %%mm1, %%mm0       \n\t"
-        "paddd                   %%mm3, %%mm2       \n\t"
-
-        "movd                    6(%0), %%mm1       \n\t"
-        "movd                    8(%0), %%mm3       \n\t"
-        "add                       $12, %0          \n\t"
-        "punpcklbw               %%mm7, %%mm1       \n\t"
-        "punpcklbw               %%mm7, %%mm3       \n\t"
-        "movq                    %%mm1, %%mm4       \n\t"
-        "movq                    %%mm3, %%mm5       \n\t"
-        "pmaddwd                    %4, %%mm1       \n\t"
-        "pmaddwd                  8+%4, %%mm3       \n\t"
-        "pmaddwd                 16+%4, %%mm4       \n\t"
-        "pmaddwd                 %%mm6, %%mm5       \n\t"
-        "paddd                   %%mm3, %%mm1       \n\t"
-        "paddd                   %%mm5, %%mm4       \n\t"
-
-        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
-        "paddd                   %%mm3, %%mm0       \n\t"
-        "paddd                   %%mm3, %%mm2       \n\t"
-        "paddd                   %%mm3, %%mm1       \n\t"
-        "paddd                   %%mm3, %%mm4       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
-        "psrad                     $15, %%mm1       \n\t"
-        "psrad                     $15, %%mm4       \n\t"
-        "packssdw                %%mm1, %%mm0       \n\t"
-        "packssdw                %%mm4, %%mm2       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "packuswb                %%mm2, %%mm2       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "movd                %%mm2, (%2, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
-        " js                        1b              \n\t"
-    : "+r" (src)
-    : "r" (dstU+width), "r" (dstV+width), "g" (-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
-    : "%"REG_a
-    );
-}
-#endif
-
-static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
-#else
-    int i;
-    for (i=0; i<width; i++)
-    {
-        int b= src[i*3+0];
-        int g= src[i*3+1];
-        int r= src[i*3+2];
-
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-    }
-#endif /* HAVE_MMX */
-}
-
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
-#else
-    int i;
-    for (i=0; i<width; i++)
-    {
-        int b= src1[3*i + 0];
-        int g= src1[3*i + 1];
-        int r= src1[3*i + 2];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-    }
-#endif /* HAVE_MMX */
-    assert(src1 == src2);
-}
-
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++)
-    {
-        int b= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int r= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-    assert(src1 == src2);
-}
-
-static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
-#else
-    int i;
-    for (i=0; i<width; i++)
-    {
-        int r= src[i*3+0];
-        int g= src[i*3+1];
-        int b= src[i*3+2];
-
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-    }
-#endif
-}
-
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
-{
-#if HAVE_MMX
-    assert(src1==src2);
-    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
-#else
-    int i;
-    assert(src1==src2);
-    for (i=0; i<width; i++)
-    {
-        int r= src1[3*i + 0];
-        int g= src1[3*i + 1];
-        int b= src1[3*i + 2];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-    }
-#endif
-}
-
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i=0; i<width; i++)
-    {
-        int r= src1[6*i + 0] + src1[6*i + 3];
-        int g= src1[6*i + 1] + src1[6*i + 4];
-        int b= src1[6*i + 2] + src1[6*i + 5];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-    }
-}
-
-
-static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal)
-{
-    int i;
-    for (i=0; i<width; i++)
-    {
-        int d= src[i];
-
-        dst[i]= pal[d] & 0xFF;
-    }
-}
-
-static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *pal)
-{
-    int i;
-    assert(src1 == src2);
-    for (i=0; i<width; i++)
-    {
-        int p= pal[src1[i]];
-
-        dstU[i]= p>>8;
-        dstV[i]= p>>16;
-    }
-}
-
-static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
-    int i, j;
-    for (i=0; i<width/8; i++){
-        int d= ~src[i];
-        for(j=0; j<8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1)*255;
-    }
-}
-
-static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
-{
-    int i, j;
-    for (i=0; i<width/8; i++){
-        int d= src[i];
-        for(j=0; j<8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1)*255;
-    }
-}
-
-// bilinear / bicubic scaling
-static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
-                                  int16_t *filter, int16_t *filterPos, long filterSize)
-{
-#if HAVE_MMX
-    assert(filterSize % 4 == 0 && filterSize>0);
-    if (filterSize==4) // Always true for upscaling, sometimes for down, too.
-    {
-        long counter= -2*dstW;
-        filter-= counter*2;
-        filterPos-= counter/2;
-        dst-= counter/2;
-        __asm__ volatile(
-#if defined(PIC)
-        "push            %%"REG_b"              \n\t"
-#endif
-        "pxor                %%mm7, %%mm7       \n\t"
-        "push           %%"REG_BP"              \n\t" // we use 7 regs here ...
-        "mov             %%"REG_a", %%"REG_BP"  \n\t"
-        ASMALIGN(4)
-        "1:                                     \n\t"
-        "movzwl   (%2, %%"REG_BP"), %%eax       \n\t"
-        "movzwl  2(%2, %%"REG_BP"), %%ebx       \n\t"
-        "movq  (%1, %%"REG_BP", 4), %%mm1       \n\t"
-        "movq 8(%1, %%"REG_BP", 4), %%mm3       \n\t"
-        "movd      (%3, %%"REG_a"), %%mm0       \n\t"
-        "movd      (%3, %%"REG_b"), %%mm2       \n\t"
-        "punpcklbw           %%mm7, %%mm0       \n\t"
-        "punpcklbw           %%mm7, %%mm2       \n\t"
-        "pmaddwd             %%mm1, %%mm0       \n\t"
-        "pmaddwd             %%mm2, %%mm3       \n\t"
-        "movq                %%mm0, %%mm4       \n\t"
-        "punpckldq           %%mm3, %%mm0       \n\t"
-        "punpckhdq           %%mm3, %%mm4       \n\t"
-        "paddd               %%mm4, %%mm0       \n\t"
-        "psrad                  $7, %%mm0       \n\t"
-        "packssdw            %%mm0, %%mm0       \n\t"
-        "movd                %%mm0, (%4, %%"REG_BP")    \n\t"
-        "add                    $4, %%"REG_BP"  \n\t"
-        " jnc                   1b              \n\t"
-
-        "pop            %%"REG_BP"              \n\t"
-#if defined(PIC)
-        "pop             %%"REG_b"              \n\t"
-#endif
-        : "+a" (counter)
-        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
-#if !defined(PIC)
-        : "%"REG_b
-#endif
-        );
-    }
-    else if (filterSize==8)
-    {
-        long counter= -2*dstW;
-        filter-= counter*4;
-        filterPos-= counter/2;
-        dst-= counter/2;
-        __asm__ volatile(
-#if defined(PIC)
-        "push             %%"REG_b"             \n\t"
-#endif
-        "pxor                 %%mm7, %%mm7      \n\t"
-        "push            %%"REG_BP"             \n\t" // we use 7 regs here ...
-        "mov              %%"REG_a", %%"REG_BP" \n\t"
-        ASMALIGN(4)
-        "1:                                     \n\t"
-        "movzwl    (%2, %%"REG_BP"), %%eax      \n\t"
-        "movzwl   2(%2, %%"REG_BP"), %%ebx      \n\t"
-        "movq   (%1, %%"REG_BP", 8), %%mm1      \n\t"
-        "movq 16(%1, %%"REG_BP", 8), %%mm3      \n\t"
-        "movd       (%3, %%"REG_a"), %%mm0      \n\t"
-        "movd       (%3, %%"REG_b"), %%mm2      \n\t"
-        "punpcklbw            %%mm7, %%mm0      \n\t"
-        "punpcklbw            %%mm7, %%mm2      \n\t"
-        "pmaddwd              %%mm1, %%mm0      \n\t"
-        "pmaddwd              %%mm2, %%mm3      \n\t"
-
-        "movq  8(%1, %%"REG_BP", 8), %%mm1      \n\t"
-        "movq 24(%1, %%"REG_BP", 8), %%mm5      \n\t"
-        "movd      4(%3, %%"REG_a"), %%mm4      \n\t"
-        "movd      4(%3, %%"REG_b"), %%mm2      \n\t"
-        "punpcklbw            %%mm7, %%mm4      \n\t"
-        "punpcklbw            %%mm7, %%mm2      \n\t"
-        "pmaddwd              %%mm1, %%mm4      \n\t"
-        "pmaddwd              %%mm2, %%mm5      \n\t"
-        "paddd                %%mm4, %%mm0      \n\t"
-        "paddd                %%mm5, %%mm3      \n\t"
-        "movq                 %%mm0, %%mm4      \n\t"
-        "punpckldq            %%mm3, %%mm0      \n\t"
-        "punpckhdq            %%mm3, %%mm4      \n\t"
-        "paddd                %%mm4, %%mm0      \n\t"
-        "psrad                   $7, %%mm0      \n\t"
-        "packssdw             %%mm0, %%mm0      \n\t"
-        "movd                 %%mm0, (%4, %%"REG_BP")   \n\t"
-        "add                     $4, %%"REG_BP" \n\t"
-        " jnc                    1b             \n\t"
-
-        "pop             %%"REG_BP"             \n\t"
-#if defined(PIC)
-        "pop              %%"REG_b"             \n\t"
-#endif
-        : "+a" (counter)
-        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
-#if !defined(PIC)
-        : "%"REG_b
-#endif
-        );
-    }
-    else
-    {
-        uint8_t *offset = src+filterSize;
-        long counter= -2*dstW;
-        //filter-= counter*filterSize/2;
-        filterPos-= counter/2;
-        dst-= counter/2;
-        __asm__ volatile(
-        "pxor                  %%mm7, %%mm7     \n\t"
-        ASMALIGN(4)
-        "1:                                     \n\t"
-        "mov                      %2, %%"REG_c" \n\t"
-        "movzwl      (%%"REG_c", %0), %%eax     \n\t"
-        "movzwl     2(%%"REG_c", %0), %%edx     \n\t"
-        "mov                      %5, %%"REG_c" \n\t"
-        "pxor                  %%mm4, %%mm4     \n\t"
-        "pxor                  %%mm5, %%mm5     \n\t"
-        "2:                                     \n\t"
-        "movq                   (%1), %%mm1     \n\t"
-        "movq               (%1, %6), %%mm3     \n\t"
-        "movd (%%"REG_c", %%"REG_a"), %%mm0     \n\t"
-        "movd (%%"REG_c", %%"REG_d"), %%mm2     \n\t"
-        "punpcklbw             %%mm7, %%mm0     \n\t"
-        "punpcklbw             %%mm7, %%mm2     \n\t"
-        "pmaddwd               %%mm1, %%mm0     \n\t"
-        "pmaddwd               %%mm2, %%mm3     \n\t"
-        "paddd                 %%mm3, %%mm5     \n\t"
-        "paddd                 %%mm0, %%mm4     \n\t"
-        "add                      $8, %1        \n\t"
-        "add                      $4, %%"REG_c" \n\t"
-        "cmp                      %4, %%"REG_c" \n\t"
-        " jb                      2b            \n\t"
-        "add                      %6, %1        \n\t"
-        "movq                  %%mm4, %%mm0     \n\t"
-        "punpckldq             %%mm5, %%mm4     \n\t"
-        "punpckhdq             %%mm5, %%mm0     \n\t"
-        "paddd                 %%mm0, %%mm4     \n\t"
-        "psrad                    $7, %%mm4     \n\t"
-        "packssdw              %%mm4, %%mm4     \n\t"
-        "mov                      %3, %%"REG_a" \n\t"
-        "movd                  %%mm4, (%%"REG_a", %0)   \n\t"
-        "add                      $4, %0        \n\t"
-        " jnc                     1b            \n\t"
-
-        : "+r" (counter), "+r" (filter)
-        : "m" (filterPos), "m" (dst), "m"(offset),
-          "m" (src), "r" (filterSize*2)
-        : "%"REG_a, "%"REG_c, "%"REG_d
-        );
-    }
-#else
-#if HAVE_ALTIVEC
-    hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
-#else
-    int i;
-    for (i=0; i<dstW; i++)
-    {
-        int j;
-        int srcPos= filterPos[i];
-        int val=0;
-        //printf("filterPos: %d\n", filterPos[i]);
-        for (j=0; j<filterSize; j++)
-        {
-            //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
-            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
-        }
-        //filter += hFilterSize;
-        dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
-        //dst[i] = val>>7;
-    }
-#endif /* HAVE_ALTIVEC */
-#endif /* HAVE_MMX */
-}
-      // *** horizontal scale Y line to temp buffer
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
-                                   int flags, int canMMX2BeUsed, int16_t *hLumFilter,
-                                   int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
-                                   int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
-                                   int32_t *mmx2FilterPos, uint32_t *pal)
-{
-    if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
-    {
-        RENAME(yuy2ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
-    {
-        RENAME(uyvyToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_RGB32)
-    {
-        RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_RGB32_1)
-    {
-        RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_BGR24)
-    {
-        RENAME(bgr24ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_BGR565)
-    {
-        RENAME(bgr16ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_BGR555)
-    {
-        RENAME(bgr15ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_BGR32)
-    {
-        RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_BGR32_1)
-    {
-        RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_RGB24)
-    {
-        RENAME(rgb24ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_RGB565)
-    {
-        RENAME(rgb16ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_RGB555)
-    {
-        RENAME(rgb15ToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE  || srcFormat==PIX_FMT_RGB4_BYTE)
-    {
-        RENAME(palToY)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_MONOBLACK)
-    {
-        RENAME(monoblack2Y)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-    else if (srcFormat==PIX_FMT_MONOWHITE)
-    {
-        RENAME(monowhite2Y)(formatConvBuffer, src, srcW, pal);
-        src= formatConvBuffer;
-    }
-
-#if HAVE_MMX
-    // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
-    if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
-#else
-    if (!(flags&SWS_FAST_BILINEAR))
-#endif
-    {
-        RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
-    }
-    else // fast bilinear upscale / crap downscale
-    {
-#if ARCH_X86
-#if HAVE_MMX2
-        int i;
-#if defined(PIC)
-        uint64_t ebxsave __attribute__((aligned(8)));
-#endif
-        if (canMMX2BeUsed)
-        {
-            __asm__ volatile(
-#if defined(PIC)
-            "mov               %%"REG_b", %5        \n\t"
-#endif
-            "pxor                  %%mm7, %%mm7     \n\t"
-            "mov                      %0, %%"REG_c" \n\t"
-            "mov                      %1, %%"REG_D" \n\t"
-            "mov                      %2, %%"REG_d" \n\t"
-            "mov                      %3, %%"REG_b" \n\t"
-            "xor               %%"REG_a", %%"REG_a" \n\t" // i
-            PREFETCH"        (%%"REG_c")            \n\t"
-            PREFETCH"      32(%%"REG_c")            \n\t"
-            PREFETCH"      64(%%"REG_c")            \n\t"
-
-#if ARCH_X86_64
-
-#define FUNNY_Y_CODE \
-            "movl            (%%"REG_b"), %%esi     \n\t"\
-            "call                    *%4            \n\t"\
-            "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
-            "add               %%"REG_S", %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
-
-#else
-
-#define FUNNY_Y_CODE \
-            "movl (%%"REG_b"), %%esi        \n\t"\
-            "call         *%4                       \n\t"\
-            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
-            "add               %%"REG_a", %%"REG_D" \n\t"\
-            "xor               %%"REG_a", %%"REG_a" \n\t"\
-
-#endif /* ARCH_X86_64 */
-
-FUNNY_Y_CODE
-FUNNY_Y_CODE
-FUNNY_Y_CODE
-FUNNY_Y_CODE
-FUNNY_Y_CODE
-FUNNY_Y_CODE
-FUNNY_Y_CODE
-FUNNY_Y_CODE
-
-#if defined(PIC)
-            "mov                      %5, %%"REG_b" \n\t"
-#endif
-            :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
-            "m" (funnyYCode)
-#if defined(PIC)
-            ,"m" (ebxsave)
-#endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
-            ,"%"REG_b
-#endif
-            );
-            for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
-        }
-        else
-        {
-#endif /* HAVE_MMX2 */
-        long xInc_shr16 = xInc >> 16;
-        uint16_t xInc_mask = xInc & 0xffff;
-        //NO MMX just normal asm ...
-        __asm__ volatile(
-        "xor %%"REG_a", %%"REG_a"            \n\t" // i
-        "xor %%"REG_d", %%"REG_d"            \n\t" // xx
-        "xorl    %%ecx, %%ecx                \n\t" // 2*xalpha
-        ASMALIGN(4)
-        "1:                                  \n\t"
-        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
-        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
-        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*2*xalpha
-        "shll      $16, %%edi                \n\t"
-        "addl    %%edi, %%esi                \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
-        "mov        %1, %%"REG_D"            \n\t"
-        "shrl       $9, %%esi                \n\t"
-        "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
-        "addw       %4, %%cx                 \n\t" //2*xalpha += xInc&0xFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>8 + carry
-
-        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
-        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
-        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*2*xalpha
-        "shll      $16, %%edi                \n\t"
-        "addl    %%edi, %%esi                \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
-        "mov        %1, %%"REG_D"            \n\t"
-        "shrl       $9, %%esi                \n\t"
-        "movw     %%si, 2(%%"REG_D", %%"REG_a", 2)  \n\t"
-        "addw       %4, %%cx                 \n\t" //2*xalpha += xInc&0xFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>8 + carry
-
-
-        "add        $2, %%"REG_a"            \n\t"
-        "cmp        %2, %%"REG_a"            \n\t"
-        " jb        1b                       \n\t"
-
-
-        :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
-        : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
-        );
-#if HAVE_MMX2
-        } //if MMX2 can't be used
-#endif
-#else
-        int i;
-        unsigned int xpos=0;
-        for (i=0;i<dstWidth;i++)
-        {
-            register unsigned int xx=xpos>>16;
-            register unsigned int xalpha=(xpos&0xFFFF)>>9;
-            dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
-            xpos+=xInc;
-        }
-#endif /* ARCH_X86 */
-    }
-
-    if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
-        int i;
-        //FIXME all pal and rgb srcFormats could do this convertion as well
-        //FIXME all scalers more complex than bilinear could do half of this transform
-        if(c->srcRange){
-            for (i=0; i<dstWidth; i++)
-                dst[i]= (dst[i]*14071 + 33561947)>>14;
-        }else{
-            for (i=0; i<dstWidth; i++)
-                dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
-        }
-    }
-}
-
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
-                                   int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
-                                   int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
-                                   int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
-                                   int32_t *mmx2FilterPos, uint32_t *pal)
-{
-    if (srcFormat==PIX_FMT_YUYV422)
-    {
-        RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_UYVY422)
-    {
-        RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_RGB32)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_RGB32_1)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
-        else
-            RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_BGR24)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(bgr24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_BGR565)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(bgr16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_BGR555)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(bgr15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_BGR32)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_BGR32_1)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
-        else
-            RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_RGB24)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(rgb24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_RGB565)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(rgb16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (srcFormat==PIX_FMT_RGB555)
-    {
-        if(c->chrSrcHSubSample)
-            RENAME(rgb15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        else
-            RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-    else if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE)
-    {
-        return;
-    }
-    else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE  || srcFormat==PIX_FMT_RGB4_BYTE)
-    {
-        RENAME(palToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
-        src1= formatConvBuffer;
-        src2= formatConvBuffer+VOFW;
-    }
-
-#if HAVE_MMX
-    // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
-    if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
-#else
-    if (!(flags&SWS_FAST_BILINEAR))
-#endif
-    {
-        RENAME(hScale)(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-        RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-    }
-    else // fast bilinear upscale / crap downscale
-    {
-#if ARCH_X86
-#if HAVE_MMX2
-        int i;
-#if defined(PIC)
-        uint64_t ebxsave __attribute__((aligned(8)));
-#endif
-        if (canMMX2BeUsed)
-        {
-            __asm__ volatile(
-#if defined(PIC)
-            "mov          %%"REG_b", %6         \n\t"
-#endif
-            "pxor             %%mm7, %%mm7      \n\t"
-            "mov                 %0, %%"REG_c"  \n\t"
-            "mov                 %1, %%"REG_D"  \n\t"
-            "mov                 %2, %%"REG_d"  \n\t"
-            "mov                 %3, %%"REG_b"  \n\t"
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
-
-#if ARCH_X86_64
-
-#define FUNNY_UV_CODE \
-            "movl       (%%"REG_b"), %%esi      \n\t"\
-            "call               *%4             \n\t"\
-            "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
-            "add          %%"REG_S", %%"REG_c"  \n\t"\
-            "add          %%"REG_a", %%"REG_D"  \n\t"\
-            "xor          %%"REG_a", %%"REG_a"  \n\t"\
-
-#else
-
-#define FUNNY_UV_CODE \
-            "movl       (%%"REG_b"), %%esi      \n\t"\
-            "call               *%4             \n\t"\
-            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
-            "add          %%"REG_a", %%"REG_D"  \n\t"\
-            "xor          %%"REG_a", %%"REG_a"  \n\t"\
-
-#endif /* ARCH_X86_64 */
-
-FUNNY_UV_CODE
-FUNNY_UV_CODE
-FUNNY_UV_CODE
-FUNNY_UV_CODE
-            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-            "mov                 %5, %%"REG_c"  \n\t" // src
-            "mov                 %1, %%"REG_D"  \n\t" // buf1
-            "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
-            PREFETCH"   (%%"REG_c")             \n\t"
-            PREFETCH" 32(%%"REG_c")             \n\t"
-            PREFETCH" 64(%%"REG_c")             \n\t"
-
-FUNNY_UV_CODE
-FUNNY_UV_CODE
-FUNNY_UV_CODE
-FUNNY_UV_CODE
-
-#if defined(PIC)
-            "mov %6, %%"REG_b"    \n\t"
-#endif
-            :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
-            "m" (funnyUVCode), "m" (src2)
-#if defined(PIC)
-            ,"m" (ebxsave)
-#endif
-            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
-             ,"%"REG_b
-#endif
-            );
-            for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
-            {
-                //printf("%d %d %d\n", dstWidth, i, srcW);
-                dst[i] = src1[srcW-1]*128;
-                dst[i+VOFW] = src2[srcW-1]*128;
-            }
-        }
-        else
-        {
-#endif /* HAVE_MMX2 */
-            long xInc_shr16 = (long) (xInc >> 16);
-            uint16_t xInc_mask = xInc & 0xffff;
-            __asm__ volatile(
-            "xor %%"REG_a", %%"REG_a"               \n\t" // i
-            "xor %%"REG_d", %%"REG_d"               \n\t" // xx
-            "xorl    %%ecx, %%ecx                   \n\t" // 2*xalpha
-            ASMALIGN(4)
-            "1:                                     \n\t"
-            "mov        %0, %%"REG_S"               \n\t"
-            "movzbl  (%%"REG_S", %%"REG_d"), %%edi  \n\t" //src[xx]
-            "movzbl 1(%%"REG_S", %%"REG_d"), %%esi  \n\t" //src[xx+1]
-            "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
-            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*2*xalpha
-            "shll      $16, %%edi                   \n\t"
-            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
-            "mov        %1, %%"REG_D"               \n\t"
-            "shrl       $9, %%esi                   \n\t"
-            "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
-
-            "movzbl    (%5, %%"REG_d"), %%edi       \n\t" //src[xx]
-            "movzbl   1(%5, %%"REG_d"), %%esi       \n\t" //src[xx+1]
-            "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
-            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*2*xalpha
-            "shll      $16, %%edi                   \n\t"
-            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
-            "mov        %1, %%"REG_D"               \n\t"
-            "shrl       $9, %%esi                   \n\t"
-            "movw     %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2)   \n\t"
-
-            "addw       %4, %%cx                    \n\t" //2*xalpha += xInc&0xFF
-            "adc        %3, %%"REG_d"               \n\t" //xx+= xInc>>8 + carry
-            "add        $1, %%"REG_a"               \n\t"
-            "cmp        %2, %%"REG_a"               \n\t"
-            " jb        1b                          \n\t"
-
-/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
-   which is needed to support GCC 4.0. */
-#if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-            :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
-#else
-            :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
-#endif
-            "r" (src2)
-            : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
-            );
-#if HAVE_MMX2
-        } //if MMX2 can't be used
-#endif
-#else
-        int i;
-        unsigned int xpos=0;
-        for (i=0;i<dstWidth;i++)
-        {
-            register unsigned int xx=xpos>>16;
-            register unsigned int xalpha=(xpos&0xFFFF)>>9;
-            dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
-            dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
-            /* slower
-            dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
-            dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
-            */
-            xpos+=xInc;
-        }
-#endif /* ARCH_X86 */
-    }
-    if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
-        int i;
-        //FIXME all pal and rgb srcFormats could do this convertion as well
-        //FIXME all scalers more complex than bilinear could do half of this transform
-        if(c->srcRange){
-            for (i=0; i<dstWidth; i++){
-                dst[i     ]= (dst[i     ]*1799 + 4081085)>>11; //1469
-                dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469
-            }
-        }else{
-            for (i=0; i<dstWidth; i++){
-                dst[i     ]= (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
-                dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
-            }
-        }
-    }
-}
-
-static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[]){
-
-    /* load a few things into local vars to make the code more readable? and faster */
-    const int srcW= c->srcW;
-    const int dstW= c->dstW;
-    const int dstH= c->dstH;
-    const int chrDstW= c->chrDstW;
-    const int chrSrcW= c->chrSrcW;
-    const int lumXInc= c->lumXInc;
-    const int chrXInc= c->chrXInc;
-    const int dstFormat= c->dstFormat;
-    const int srcFormat= c->srcFormat;
-    const int flags= c->flags;
-    const int canMMX2BeUsed= c->canMMX2BeUsed;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *hLumFilterPos= c->hLumFilterPos;
-    int16_t *hChrFilterPos= c->hChrFilterPos;
-    int16_t *vLumFilter= c->vLumFilter;
-    int16_t *vChrFilter= c->vChrFilter;
-    int16_t *hLumFilter= c->hLumFilter;
-    int16_t *hChrFilter= c->hChrFilter;
-    int32_t *lumMmxFilter= c->lumMmxFilter;
-    int32_t *chrMmxFilter= c->chrMmxFilter;
-    const int vLumFilterSize= c->vLumFilterSize;
-    const int vChrFilterSize= c->vChrFilterSize;
-    const int hLumFilterSize= c->hLumFilterSize;
-    const int hChrFilterSize= c->hChrFilterSize;
-    int16_t **lumPixBuf= c->lumPixBuf;
-    int16_t **chrPixBuf= c->chrPixBuf;
-    const int vLumBufSize= c->vLumBufSize;
-    const int vChrBufSize= c->vChrBufSize;
-    uint8_t *funnyYCode= c->funnyYCode;
-    uint8_t *funnyUVCode= c->funnyUVCode;
-    uint8_t *formatConvBuffer= c->formatConvBuffer;
-    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
-    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
-    int lastDstY;
-    uint32_t *pal=c->pal_yuv;
-
-    /* vars which will change and which we need to store back in the context */
-    int dstY= c->dstY;
-    int lumBufIndex= c->lumBufIndex;
-    int chrBufIndex= c->chrBufIndex;
-    int lastInLumBuf= c->lastInLumBuf;
-    int lastInChrBuf= c->lastInChrBuf;
-
-    if (isPacked(c->srcFormat)){
-        src[0]=
-        src[1]=
-        src[2]= src[0];
-        srcStride[0]=
-        srcStride[1]=
-        srcStride[2]= srcStride[0];
-    }
-    srcStride[1]<<= c->vChrDrop;
-    srcStride[2]<<= c->vChrDrop;
-
-    //printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
-    //       (int)dst[0], (int)dst[1], (int)dst[2]);
-
-#if 0 //self test FIXME move to a vfilter or something
-    {
-    static volatile int i=0;
-    i++;
-    if (srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH)
-        selfTest(src, srcStride, c->srcW, c->srcH);
-    i--;
-    }
-#endif
-
-    //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
-    //dstStride[0],dstStride[1],dstStride[2]);
-
-    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
-    {
-        static int warnedAlready=0; //FIXME move this into the context perhaps
-        if (flags & SWS_PRINT_INFO && !warnedAlready)
-        {
-            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
-                   "         ->cannot do aligned memory accesses anymore\n");
-            warnedAlready=1;
-        }
-    }
-
-    /* Note the user might start scaling the picture in the middle so this
-       will not get executed. This is not really intended but works
-       currently, so people might do it. */
-    if (srcSliceY ==0){
-        lumBufIndex=0;
-        chrBufIndex=0;
-        dstY=0;
-        lastInLumBuf= -1;
-        lastInChrBuf= -1;
-    }
-
-    lastDstY= dstY;
-
-    for (;dstY < dstH; dstY++){
-        unsigned char *dest =dst[0]+dstStride[0]*dstY;
-        const int chrDstY= dstY>>c->chrDstVSubSample;
-        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
-        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
-
-        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
-        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
-        const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
-        const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
-
-        //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
-        // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize,  c->chrSrcVSubSample);
-        //handle holes (FAST_BILINEAR & weird filters)
-        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
-        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
-        //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
-        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
-        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
-
-        // Do we have enough lines in this slice to output the dstY line
-        if (lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
-        {
-            //Do horizontal scaling
-            while(lastInLumBuf < lastLumSrcY)
-            {
-                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
-                lumBufIndex++;
-                //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf,  lastLumSrcY);
-                assert(lumBufIndex < 2*vLumBufSize);
-                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
-                assert(lastInLumBuf + 1 - srcSliceY >= 0);
-                //printf("%d %d\n", lumBufIndex, vLumBufSize);
-                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
-                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
-                                funnyYCode, c->srcFormat, formatConvBuffer,
-                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
-                lastInLumBuf++;
-            }
-            while(lastInChrBuf < lastChrSrcY)
-            {
-                uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
-                uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
-                chrBufIndex++;
-                assert(chrBufIndex < 2*vChrBufSize);
-                assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
-                assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
-                //FIXME replace parameters through context struct (some at least)
-
-                if (!(isGray(srcFormat) || isGray(dstFormat)))
-                    RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
-                                    flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
-                                    funnyUVCode, c->srcFormat, formatConvBuffer,
-                                    c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
-                lastInChrBuf++;
-            }
-            //wrap buf index around to stay inside the ring buffer
-            if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
-            if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
-        }
-        else // not enough lines left in this slice -> load the rest in the buffer
-        {
-            /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
-            firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
-            lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
-            vChrBufSize, vLumBufSize);*/
-
-            //Do horizontal scaling
-            while(lastInLumBuf+1 < srcSliceY + srcSliceH)
-            {
-                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
-                lumBufIndex++;
-                assert(lumBufIndex < 2*vLumBufSize);
-                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
-                assert(lastInLumBuf + 1 - srcSliceY >= 0);
-                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
-                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
-                                funnyYCode, c->srcFormat, formatConvBuffer,
-                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
-                lastInLumBuf++;
-            }
-            while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
-            {
-                uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
-                uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
-                chrBufIndex++;
-                assert(chrBufIndex < 2*vChrBufSize);
-                assert(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH);
-                assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
-
-                if (!(isGray(srcFormat) || isGray(dstFormat)))
-                    RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
-                            flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
-                            funnyUVCode, c->srcFormat, formatConvBuffer,
-                            c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
-                lastInChrBuf++;
-            }
-            //wrap buf index around to stay inside the ring buffer
-            if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
-            if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
-            break; //we can't output a dstY line so let's try with the next slice
-        }
-
-#if HAVE_MMX
-        c->blueDither= ff_dither8[dstY&1];
-        if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
-            c->greenDither= ff_dither8[dstY&1];
-        else
-            c->greenDither= ff_dither4[dstY&1];
-        c->redDither= ff_dither8[(dstY+1)&1];
-#endif
-        if (dstY < dstH-2)
-        {
-            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-#if HAVE_MMX
-            int i;
-        if (flags & SWS_ACCURATE_RND){
-            int s= APCK_SIZE / 8;
-            for (i=0; i<vLumFilterSize; i+=2){
-                *(void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
-                *(void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
-                          lumMmxFilter[s*i+APCK_COEF/4  ]=
-                          lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
-                    + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
-            }
-            for (i=0; i<vChrFilterSize; i+=2){
-                *(void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
-                *(void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
-                          chrMmxFilter[s*i+APCK_COEF/4  ]=
-                          chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
-                    + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
-            }
-        }else{
-            for (i=0; i<vLumFilterSize; i++)
-            {
-                lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
-                lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
-                lumMmxFilter[4*i+2]=
-                lumMmxFilter[4*i+3]=
-                    ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
-            }
-            for (i=0; i<vChrFilterSize; i++)
-            {
-                chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
-                chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
-                chrMmxFilter[4*i+2]=
-                chrMmxFilter[4*i+3]=
-                    ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
-            }
-        }
-#endif
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                RENAME(yuv2nv12X)(c,
-                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                    dest, uDest, dstW, chrDstW, dstFormat);
-            }
-            else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12 like
-            {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12
-                {
-                    int16_t *lumBuf = lumPixBuf[0];
-                    int16_t *chrBuf= chrPixBuf[0];
-                    RENAME(yuv2yuv1)(c, lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
-                }
-                else //General YV12
-                {
-                    RENAME(yuv2yuvX)(c,
-                        vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                        vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                        dest, uDest, vDest, dstW, chrDstW);
-                }
-            }
-            else
-            {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
-                if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB
-                {
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    if(flags & SWS_FULL_CHR_H_INT){
-                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
-                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                            dest, dstW, dstY);
-                    }else{
-                        RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
-                            dest, dstW, chrAlpha, dstFormat, flags, dstY);
-                    }
-                }
-                else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
-                {
-                    int lumAlpha= vLumFilter[2*dstY+1];
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    lumMmxFilter[2]=
-                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
-                    chrMmxFilter[2]=
-                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    if(flags & SWS_FULL_CHR_H_INT){
-                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
-                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                            dest, dstW, dstY);
-                    }else{
-                        RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
-                            dest, dstW, lumAlpha, chrAlpha, dstY);
-                    }
-                }
-                else //general RGB
-                {
-                    if(flags & SWS_FULL_CHR_H_INT){
-                        yuv2rgbXinC_full(c,
-                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                            dest, dstW, dstY);
-                    }else{
-                        RENAME(yuv2packedX)(c,
-                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                            dest, dstW, dstY);
-                    }
-                }
-            }
-        }
-        else // hmm looks like we can't use MMX here without overwriting this array's tail
-        {
-            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2nv12XinC(
-                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                    dest, uDest, dstW, chrDstW, dstFormat);
-            }
-            else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12
-            {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2yuvXinC(
-                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                    dest, uDest, vDest, dstW, chrDstW);
-            }
-            else
-            {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
-                if(flags & SWS_FULL_CHR_H_INT){
-                    yuv2rgbXinC_full(c,
-                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                        dest, dstW, dstY);
-                }else{
-                    yuv2packedXinC(c,
-                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
-                        dest, dstW, dstY);
-                }
-            }
-        }
-    }
-
-#if HAVE_MMX
-    __asm__ volatile(SFENCE:::"memory");
-    __asm__ volatile(EMMS:::"memory");
-#endif
-    /* store changed local vars back in the context */
-    c->dstY= dstY;
-    c->lumBufIndex= lumBufIndex;
-    c->chrBufIndex= chrBufIndex;
-    c->lastInLumBuf= lastInLumBuf;
-    c->lastInChrBuf= lastInChrBuf;
-
-    return dstY - lastDstY;
-}
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
deleted file mode 100644
index 65af412c2c..0000000000
--- a/libswscale/yuv2rgb.c
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * software YUV to RGB converter
- *
- * Copyright (C) 2009 Konstantin Shishkov
- *
- * MMX/MMX2 template stuff (needed for fast movntq support),
- * 1,4,8bpp support and context / deglobalize stuff
- * by Michael Niedermayer (michaelni@gmx.at)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <assert.h>
-
-#include "config.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-#define DITHER1XBPP // only for MMX
-
-extern const uint8_t dither_8x8_32[8][8];
-extern const uint8_t dither_8x8_73[8][8];
-extern const uint8_t dither_8x8_220[8][8];
-
-#if HAVE_MMX && CONFIG_GPL
-
-/* hope these constant values are cache line aligned */
-DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw)   = 0x00ff00ff00ff00ffULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
-DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
-
-//MMX versions
-#undef RENAME
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 0
-#define RENAME(a) a ## _MMX
-#include "yuv2rgb_template.c"
-
-//MMX2 versions
-#undef RENAME
-#undef HAVE_MMX2
-#define HAVE_MMX2 1
-#define RENAME(a) a ## _MMX2
-#include "yuv2rgb_template.c"
-
-#endif /* HAVE_MMX && CONFIG_GPL */
-
-const int32_t ff_yuv2rgb_coeffs[8][4] = {
-    {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
-    {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
-    {104597, 132201, 25675, 53279}, /* unspecified */
-    {104597, 132201, 25675, 53279}, /* reserved */
-    {104448, 132798, 24759, 53109}, /* FCC */
-    {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
-    {104597, 132201, 25675, 53279}, /* SMPTE 170M */
-    {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
-};
-
-#define LOADCHROMA(i)                               \
-    U = pu[i];                                      \
-    V = pv[i];                                      \
-    r = (void *)c->table_rV[V];                     \
-    g = (void *)(c->table_gU[U] + c->table_gV[V]);  \
-    b = (void *)c->table_bU[U];
-
-#define PUTRGB(dst,src,i,o)          \
-    Y = src[2*i+o];                  \
-    dst[2*i  ] = r[Y] + g[Y] + b[Y]; \
-    Y = src[2*i+1-o];                \
-    dst[2*i+1] = r[Y] + g[Y] + b[Y];
-
-#define PUTRGB24(dst,src,i)                                  \
-    Y = src[2*i];                                            \
-    dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
-    Y = src[2*i+1];                                          \
-    dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
-
-#define PUTBGR24(dst,src,i)                                  \
-    Y = src[2*i];                                            \
-    dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
-    Y = src[2*i+1];                                          \
-    dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
-
-#define YUV2RGBFUNC(func_name, dst_type) \
-static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
-                     int srcSliceH, uint8_t* dst[], int dstStride[]){\
-    int y;\
-\
-    if (c->srcFormat == PIX_FMT_YUV422P) {\
-        srcStride[1] *= 2;\
-        srcStride[2] *= 2;\
-    }\
-    for (y=0; y<srcSliceH; y+=2) {\
-        dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY  )*dstStride[0]);\
-        dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
-        dst_type av_unused *r, *b;\
-        dst_type *g;\
-        uint8_t *py_1 = src[0] + y*srcStride[0];\
-        uint8_t *py_2 = py_1 + srcStride[0];\
-        uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
-        uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
-        unsigned int h_size = c->dstW>>3;\
-        while (h_size--) {\
-            int av_unused U, V;\
-            int Y;\
-
-#define ENDYUV2RGBLINE(dst_delta)\
-            pu += 4;\
-            pv += 4;\
-            py_1 += 8;\
-            py_2 += 8;\
-            dst_1 += dst_delta;\
-            dst_2 += dst_delta;\
-        }\
-        if (c->dstW & 4) {\
-            int av_unused Y, U, V;\
-
-#define ENDYUV2RGBFUNC()\
-        }\
-    }\
-    return srcSliceH;\
-}
-
-#define CLOSEYUV2RGBFUNC(dst_delta)\
-    ENDYUV2RGBLINE(dst_delta)\
-    ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_32, uint32_t)
-    LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0,0);
-    PUTRGB(dst_2,py_2,0,1);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1,1);
-    PUTRGB(dst_1,py_1,1,0);
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1,1);
-    PUTRGB(dst_1,py_1,1,0);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2,0);
-    PUTRGB(dst_2,py_2,2,1);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3,1);
-    PUTRGB(dst_1,py_1,3,0);
-ENDYUV2RGBLINE(8)
-    LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0,0);
-    PUTRGB(dst_2,py_2,0,1);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1,1);
-    PUTRGB(dst_1,py_1,1,0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1,py_1,0);
-    PUTRGB24(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2,py_2,1);
-    PUTRGB24(dst_1,py_1,1);
-
-    LOADCHROMA(2);
-    PUTRGB24(dst_1,py_1,2);
-    PUTRGB24(dst_2,py_2,2);
-
-    LOADCHROMA(3);
-    PUTRGB24(dst_2,py_2,3);
-    PUTRGB24(dst_1,py_1,3);
-ENDYUV2RGBLINE(24)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1,py_1,0);
-    PUTRGB24(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2,py_2,1);
-    PUTRGB24(dst_1,py_1,1);
-ENDYUV2RGBFUNC()
-
-// only trivial mods from yuv2rgb_c_24_rgb
-YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1,py_1,0);
-    PUTBGR24(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2,py_2,1);
-    PUTBGR24(dst_1,py_1,1);
-
-    LOADCHROMA(2);
-    PUTBGR24(dst_1,py_1,2);
-    PUTBGR24(dst_2,py_2,2);
-
-    LOADCHROMA(3);
-    PUTBGR24(dst_2,py_2,3);
-    PUTBGR24(dst_1,py_1,3);
-ENDYUV2RGBLINE(24)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1,py_1,0);
-    PUTBGR24(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2,py_2,1);
-    PUTBGR24(dst_1,py_1,1);
-ENDYUV2RGBFUNC()
-
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_16, uint16_t)
-    LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0,0);
-    PUTRGB(dst_2,py_2,0,1);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1,1);
-    PUTRGB(dst_1,py_1,1,0);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2,0);
-    PUTRGB(dst_2,py_2,2,1);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3,1);
-    PUTRGB(dst_1,py_1,3,0);
-CLOSEYUV2RGBFUNC(8)
-
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8, uint8_t)
-    LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0,0);
-    PUTRGB(dst_2,py_2,0,1);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1,1);
-    PUTRGB(dst_1,py_1,1,0);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2,0);
-    PUTRGB(dst_2,py_2,2,1);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3,1);
-    PUTRGB(dst_1,py_1,3,0);
-CLOSEYUV2RGBFUNC(8)
-
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t)
-    const uint8_t *d32 = dither_8x8_32[y&7];
-    const uint8_t *d64 = dither_8x8_73[y&7];
-#define PUTRGB8(dst,src,i,o)                                    \
-    Y = src[2*i];                                               \
-    dst[2*i]   = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
-    Y = src[2*i+1];                                             \
-    dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
-
-    LOADCHROMA(0);
-    PUTRGB8(dst_1,py_1,0,0);
-    PUTRGB8(dst_2,py_2,0,0+8);
-
-    LOADCHROMA(1);
-    PUTRGB8(dst_2,py_2,1,2+8);
-    PUTRGB8(dst_1,py_1,1,2);
-
-    LOADCHROMA(2);
-    PUTRGB8(dst_1,py_1,2,4);
-    PUTRGB8(dst_2,py_2,2,4+8);
-
-    LOADCHROMA(3);
-    PUTRGB8(dst_2,py_2,3,6+8);
-    PUTRGB8(dst_1,py_1,3,6);
-CLOSEYUV2RGBFUNC(8)
-
-
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_4, uint8_t)
-    int acc;
-#define PUTRGB4(dst,src,i)          \
-    Y = src[2*i];                   \
-    acc = r[Y] + g[Y] + b[Y];       \
-    Y = src[2*i+1];                 \
-    acc |= (r[Y] + g[Y] + b[Y])<<4; \
-    dst[i] = acc;
-
-    LOADCHROMA(0);
-    PUTRGB4(dst_1,py_1,0);
-    PUTRGB4(dst_2,py_2,0);
-
-    LOADCHROMA(1);
-    PUTRGB4(dst_2,py_2,1);
-    PUTRGB4(dst_1,py_1,1);
-
-    LOADCHROMA(2);
-    PUTRGB4(dst_1,py_1,2);
-    PUTRGB4(dst_2,py_2,2);
-
-    LOADCHROMA(3);
-    PUTRGB4(dst_2,py_2,3);
-    PUTRGB4(dst_1,py_1,3);
-CLOSEYUV2RGBFUNC(4)
-
-YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t)
-    const uint8_t *d64 =  dither_8x8_73[y&7];
-    const uint8_t *d128 = dither_8x8_220[y&7];
-    int acc;
-
-#define PUTRGB4D(dst,src,i,o)                                     \
-    Y = src[2*i];                                                 \
-    acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]];        \
-    Y = src[2*i+1];                                               \
-    acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4;  \
-    dst[i]= acc;
-
-    LOADCHROMA(0);
-    PUTRGB4D(dst_1,py_1,0,0);
-    PUTRGB4D(dst_2,py_2,0,0+8);
-
-    LOADCHROMA(1);
-    PUTRGB4D(dst_2,py_2,1,2+8);
-    PUTRGB4D(dst_1,py_1,1,2);
-
-    LOADCHROMA(2);
-    PUTRGB4D(dst_1,py_1,2,4);
-    PUTRGB4D(dst_2,py_2,2,4+8);
-
-    LOADCHROMA(3);
-    PUTRGB4D(dst_2,py_2,3,6+8);
-    PUTRGB4D(dst_1,py_1,3,6);
-CLOSEYUV2RGBFUNC(4)
-
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t)
-    LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0,0);
-    PUTRGB(dst_2,py_2,0,1);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1,1);
-    PUTRGB(dst_1,py_1,1,0);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2,0);
-    PUTRGB(dst_2,py_2,2,1);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3,1);
-    PUTRGB(dst_1,py_1,3,0);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t)
-    const uint8_t *d64 =  dither_8x8_73[y&7];
-    const uint8_t *d128 = dither_8x8_220[y&7];
-
-#define PUTRGB4DB(dst,src,i,o)                                    \
-    Y = src[2*i];                                                 \
-    dst[2*i]   = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
-    Y = src[2*i+1];                                               \
-    dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
-
-    LOADCHROMA(0);
-    PUTRGB4DB(dst_1,py_1,0,0);
-    PUTRGB4DB(dst_2,py_2,0,0+8);
-
-    LOADCHROMA(1);
-    PUTRGB4DB(dst_2,py_2,1,2+8);
-    PUTRGB4DB(dst_1,py_1,1,2);
-
-    LOADCHROMA(2);
-    PUTRGB4DB(dst_1,py_1,2,4);
-    PUTRGB4DB(dst_2,py_2,2,4+8);
-
-    LOADCHROMA(3);
-    PUTRGB4DB(dst_2,py_2,3,6+8);
-    PUTRGB4DB(dst_1,py_1,3,6);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t)
-        const uint8_t *d128 = dither_8x8_220[y&7];
-        char out_1 = 0, out_2 = 0;
-        g= c->table_gU[128] + c->table_gV[128];
-
-#define PUTRGB1(out,src,i,o)    \
-    Y = src[2*i];               \
-    out+= out + g[Y+d128[0+o]]; \
-    Y = src[2*i+1];             \
-    out+= out + g[Y+d128[1+o]];
-
-    PUTRGB1(out_1,py_1,0,0);
-    PUTRGB1(out_2,py_2,0,0+8);
-
-    PUTRGB1(out_2,py_2,1,2+8);
-    PUTRGB1(out_1,py_1,1,2);
-
-    PUTRGB1(out_1,py_1,2,4);
-    PUTRGB1(out_2,py_2,2,4+8);
-
-    PUTRGB1(out_2,py_2,3,6+8);
-    PUTRGB1(out_1,py_1,3,6);
-
-    dst_1[0]= out_1;
-    dst_2[0]= out_2;
-CLOSEYUV2RGBFUNC(1)
-
-SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c)
-{
-    SwsFunc t = NULL;
-#if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL
-    if (c->flags & SWS_CPU_CAPS_MMX2) {
-        switch (c->dstFormat) {
-        case PIX_FMT_RGB32:  return yuv420_rgb32_MMX2;
-        case PIX_FMT_BGR24:  return yuv420_rgb24_MMX2;
-        case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
-        case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
-        }
-    }
-    if (c->flags & SWS_CPU_CAPS_MMX) {
-        switch (c->dstFormat) {
-        case PIX_FMT_RGB32:  return yuv420_rgb32_MMX;
-        case PIX_FMT_BGR24:  return yuv420_rgb24_MMX;
-        case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
-        case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
-        }
-    }
-#endif
-#if HAVE_VIS
-    t = sws_yuv2rgb_init_vis(c);
-#endif
-#if CONFIG_MLIB
-    t = sws_yuv2rgb_init_mlib(c);
-#endif
-#if HAVE_ALTIVEC && CONFIG_GPL
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
-        t = sws_yuv2rgb_init_altivec(c);
-#endif
-
-#if ARCH_BFIN
-    if (c->flags & SWS_CPU_CAPS_BFIN)
-        t = sws_ff_bfin_yuv2rgb_get_func_ptr(c);
-#endif
-
-    if (t)
-        return t;
-
-    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
-
-    switch (c->dstFormat) {
-    case PIX_FMT_BGR32_1:
-    case PIX_FMT_RGB32_1:
-    case PIX_FMT_BGR32:
-    case PIX_FMT_RGB32:      return yuv2rgb_c_32;
-    case PIX_FMT_RGB24:      return yuv2rgb_c_24_rgb;
-    case PIX_FMT_BGR24:      return yuv2rgb_c_24_bgr;
-    case PIX_FMT_RGB565:
-    case PIX_FMT_BGR565:
-    case PIX_FMT_RGB555:
-    case PIX_FMT_BGR555:     return yuv2rgb_c_16;
-    case PIX_FMT_RGB8:
-    case PIX_FMT_BGR8:       return yuv2rgb_c_8_ordered_dither;
-    case PIX_FMT_RGB4:
-    case PIX_FMT_BGR4:       return yuv2rgb_c_4_ordered_dither;
-    case PIX_FMT_RGB4_BYTE:
-    case PIX_FMT_BGR4_BYTE:  return yuv2rgb_c_4b_ordered_dither;
-    case PIX_FMT_MONOBLACK:  return yuv2rgb_c_1_ordered_dither;
-    default:
-        assert(0);
-    }
-    return NULL;
-}
-
-static void fill_table(uint8_t* table[256], const int elemsize, const int inc, uint8_t *y_table)
-{
-    int i;
-    int64_t cb = 0;
-
-    y_table -= elemsize * (inc >> 9);
-
-    for (i = 0; i < 256; i++) {
-        table[i] = y_table + elemsize * (cb >> 16);
-        cb += inc;
-    }
-}
-
-static void fill_gv_table(int table[256], const int elemsize, const int inc)
-{
-    int i;
-    int64_t cb = 0;
-    int off = -(inc >> 9);
-
-    for (i = 0; i < 256; i++) {
-        table[i] = elemsize * (off + (cb >> 16));
-        cb += inc;
-    }
-}
-
-av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
-                                      int brightness, int contrast, int saturation)
-{
-    const int isRgb =      c->dstFormat==PIX_FMT_RGB32
-                        || c->dstFormat==PIX_FMT_RGB32_1
-                        || c->dstFormat==PIX_FMT_BGR24
-                        || c->dstFormat==PIX_FMT_RGB565
-                        || c->dstFormat==PIX_FMT_RGB555
-                        || c->dstFormat==PIX_FMT_RGB8
-                        || c->dstFormat==PIX_FMT_RGB4
-                        || c->dstFormat==PIX_FMT_RGB4_BYTE
-                        || c->dstFormat==PIX_FMT_MONOBLACK;
-    const int bpp = fmt_depth(c->dstFormat);
-    uint8_t *y_table;
-    uint16_t *y_table16;
-    uint32_t *y_table32;
-    int i, base, rbase, gbase, bbase, abase;
-    const int yoffs = fullRange ? 384 : 326;
-
-    int64_t crv =  inv_table[0];
-    int64_t cbu =  inv_table[1];
-    int64_t cgu = -inv_table[2];
-    int64_t cgv = -inv_table[3];
-    int64_t cy  = 1<<16;
-    int64_t oy  = 0;
-
-    int64_t yb = 0;
-
-    if (!fullRange) {
-        cy = (cy*255) / 219;
-        oy = 16<<16;
-    } else {
-        crv = (crv*224) / 255;
-        cbu = (cbu*224) / 255;
-        cgu = (cgu*224) / 255;
-        cgv = (cgv*224) / 255;
-    }
-
-    cy  = (cy *contrast             ) >> 16;
-    crv = (crv*contrast * saturation) >> 32;
-    cbu = (cbu*contrast * saturation) >> 32;
-    cgu = (cgu*contrast * saturation) >> 32;
-    cgv = (cgv*contrast * saturation) >> 32;
-    oy -= 256*brightness;
-
-    //scale coefficients by cy
-    crv = ((crv << 16) + 0x8000) / cy;
-    cbu = ((cbu << 16) + 0x8000) / cy;
-    cgu = ((cgu << 16) + 0x8000) / cy;
-    cgv = ((cgv << 16) + 0x8000) / cy;
-
-    av_free(c->yuvTable);
-
-    switch (bpp) {
-    case 1:
-        c->yuvTable = av_malloc(1024);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024-110; i++) {
-            y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
-            yb += cy;
-        }
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 4:
-    case 4|128:
-        rbase = isRgb ? 3 : 0;
-        gbase = 1;
-        bbase = isRgb ? 0 : 3;
-        c->yuvTable = av_malloc(1024*3);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024-110; i++) {
-            int yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table[i+110     ] =  (yval >> 7)       << rbase;
-            y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
-            y_table[i+110+2048] =  (yval >> 7)       << bbase;
-            yb += cy;
-        }
-        fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 8:
-        rbase = isRgb ? 5 : 0;
-        gbase = isRgb ? 2 : 3;
-        bbase = isRgb ? 0 : 6;
-        c->yuvTable = av_malloc(1024*3);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024-38; i++) {
-            int yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table[i+16     ] = ((yval + 18) / 36) << rbase;
-            y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
-            y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
-            yb += cy;
-        }
-        fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 15:
-    case 16:
-        rbase = isRgb ? bpp - 5 : 0;
-        gbase = 5;
-        bbase = isRgb ? 0 : (bpp - 5);
-        c->yuvTable = av_malloc(1024*3*2);
-        y_table16 = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024; i++) {
-            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table16[i     ] = (yval >> 3)          << rbase;
-            y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
-            y_table16[i+2048] = (yval >> 3)          << bbase;
-            yb += cy;
-        }
-        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
-        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
-        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
-        fill_gv_table(c->table_gV, 2, cgv);
-        break;
-    case 24:
-        c->yuvTable = av_malloc(1024);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024; i++) {
-            y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
-            yb += cy;
-        }
-        fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs);
-        fill_gv_table(c->table_gV, 1, cgv);
-        break;
-    case 32:
-        base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
-        rbase = base + (isRgb ? 16 : 0);
-        gbase = base + 8;
-        bbase = base + (isRgb ? 0 : 16);
-        abase = (base + 24) & 31;
-        c->yuvTable = av_malloc(1024*3*4);
-        y_table32 = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024; i++) {
-            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table32[i     ] = (yval << rbase) + (255 << abase);
-            y_table32[i+1024] = yval << gbase;
-            y_table32[i+2048] = yval << bbase;
-            yb += cy;
-        }
-        fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
-        fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
-        fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
-        fill_gv_table(c->table_gV, 4, cgv);
-        break;
-    default:
-        c->yuvTable = NULL;
-        av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
-        return -1;
-    }
-    return 0;
-}
diff --git a/libswscale/yuv2rgb_altivec.c b/libswscale/yuv2rgb_altivec.c
deleted file mode 100644
index b3a87a0360..0000000000
--- a/libswscale/yuv2rgb_altivec.c
+++ /dev/null
@@ -1,962 +0,0 @@
-/*
- * AltiVec acceleration for colorspace conversion
- *
- * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/*
-Convert I420 YV12 to RGB in various formats,
-  it rejects images that are not in 420 formats,
-  it rejects images that don't have widths of multiples of 16,
-  it rejects images that don't have heights of multiples of 2.
-Reject defers to C simulation code.
-
-Lots of optimizations to be done here.
-
-1. Need to fix saturation code. I just couldn't get it to fly with packs
-   and adds, so we currently use max/min to clip.
-
-2. The inefficient use of chroma loading needs a bit of brushing up.
-
-3. Analysis of pipeline stalls needs to be done. Use shark to identify
-   pipeline stalls.
-
-
-MODIFIED to calculate coeffs from currently selected color space.
-MODIFIED core to be a macro where you specify the output format.
-ADDED UYVY conversion which is never called due to some thing in swscale.
-CORRECTED algorithim selection to be strict on input formats.
-ADDED runtime detection of AltiVec.
-
-ADDED altivec_yuv2packedX vertical scl + RGB converter
-
-March 27,2004
-PERFORMANCE ANALYSIS
-
-The C version uses 25% of the processor or ~250Mips for D1 video rawvideo
-used as test.
-The AltiVec version uses 10% of the processor or ~100Mips for D1 video
-same sequence.
-
-720 * 480 * 30  ~10MPS
-
-so we have roughly 10 clocks per pixel. This is too high, something has
-to be wrong.
-
-OPTIMIZED clip codes to utilize vec_max and vec_packs removing the
-need for vec_min.
-
-OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to have
-the input video frame, it was just decompressed so it probably resides in L1
-caches. However, we are creating the output video stream. This needs to use the
-DSTST instruction to optimize for the cache. We couple this with the fact that
-we are not going to be visiting the input buffer again so we mark it Least
-Recently Used. This shaves 25% of the processor cycles off.
-
-Now memcpy is the largest mips consumer in the system, probably due
-to the inefficient X11 stuff.
-
-GL libraries seem to be very slow on this machine 1.33Ghz PB running
-Jaguar, this is not the case for my 1Ghz PB.  I thought it might be
-a versioning issue, however I have libGL.1.2.dylib for both
-machines. (We need to figure this out now.)
-
-GL2 libraries work now with patch for RGB32.
-
-NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor.
-
-Integrated luma prescaling adjustment for saturation/contrast/brightness
-adjustment.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <assert.h>
-#include "config.h"
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-#undef PROFILE_THE_BEAST
-#undef INC_SCALING
-
-typedef unsigned char ubyte;
-typedef signed char   sbyte;
-
-
-/* RGB interleaver, 16 planar pels 8-bit samples per channel in
-   homogeneous vector registers x0,x1,x2 are interleaved with the
-   following technique:
-
-      o0 = vec_mergeh (x0,x1);
-      o1 = vec_perm (o0, x2, perm_rgb_0);
-      o2 = vec_perm (o0, x2, perm_rgb_1);
-      o3 = vec_mergel (x0,x1);
-      o4 = vec_perm (o3,o2,perm_rgb_2);
-      o5 = vec_perm (o3,o2,perm_rgb_3);
-
-  perm_rgb_0:   o0(RG).h v1(B) --> o1*
-              0   1  2   3   4
-             rgbr|gbrg|brgb|rgbr
-             0010 0100 1001 0010
-             0102 3145 2673 894A
-
-  perm_rgb_1:   o0(RG).h v1(B) --> o2
-              0   1  2   3   4
-             gbrg|brgb|bbbb|bbbb
-             0100 1001 1111 1111
-             B5CD 6EF7 89AB CDEF
-
-  perm_rgb_2:   o3(RG).l o2(rgbB.l) --> o4*
-              0   1  2   3   4
-             gbrg|brgb|rgbr|gbrg
-             1111 1111 0010 0100
-             89AB CDEF 0182 3945
-
-  perm_rgb_2:   o3(RG).l o2(rgbB.l) ---> o5*
-              0   1  2   3   4
-             brgb|rgbr|gbrg|brgb
-             1001 0010 0100 1001
-             a67b 89cA BdCD eEFf
-
-*/
-static
-const vector unsigned char
-  perm_rgb_0 = {0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
-                0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
-  perm_rgb_1 = {0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
-                0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
-  perm_rgb_2 = {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
-                0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
-  perm_rgb_3 = {0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
-                0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
-
-#define vec_merge3(x2,x1,x0,y0,y1,y2)       \
-do {                                        \
-    __typeof__(x0) o0,o2,o3;                \
-        o0 = vec_mergeh (x0,x1);            \
-        y0 = vec_perm (o0, x2, perm_rgb_0); \
-        o2 = vec_perm (o0, x2, perm_rgb_1); \
-        o3 = vec_mergel (x0,x1);            \
-        y1 = vec_perm (o3,o2,perm_rgb_2);   \
-        y2 = vec_perm (o3,o2,perm_rgb_3);   \
-} while(0)
-
-#define vec_mstbgr24(x0,x1,x2,ptr)      \
-do {                                    \
-    __typeof__(x0) _0,_1,_2;            \
-    vec_merge3 (x0,x1,x2,_0,_1,_2);     \
-    vec_st (_0, 0, ptr++);              \
-    vec_st (_1, 0, ptr++);              \
-    vec_st (_2, 0, ptr++);              \
-}  while (0);
-
-#define vec_mstrgb24(x0,x1,x2,ptr)      \
-do {                                    \
-    __typeof__(x0) _0,_1,_2;            \
-    vec_merge3 (x2,x1,x0,_0,_1,_2);     \
-    vec_st (_0, 0, ptr++);              \
-    vec_st (_1, 0, ptr++);              \
-    vec_st (_2, 0, ptr++);              \
-}  while (0);
-
-/* pack the pixels in rgb0 format
-   msb R
-   lsb 0
-*/
-#define vec_mstrgb32(T,x0,x1,x2,x3,ptr)                                       \
-do {                                                                          \
-    T _0,_1,_2,_3;                                                            \
-    _0 = vec_mergeh (x0,x1);                                                  \
-    _1 = vec_mergeh (x2,x3);                                                  \
-    _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
-    _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
-    vec_st (_2, 0*16, (T *)ptr);                                              \
-    vec_st (_3, 1*16, (T *)ptr);                                              \
-    _0 = vec_mergel (x0,x1);                                                  \
-    _1 = vec_mergel (x2,x3);                                                  \
-    _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
-    _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
-    vec_st (_2, 2*16, (T *)ptr);                                              \
-    vec_st (_3, 3*16, (T *)ptr);                                              \
-    ptr += 4;                                                                 \
-}  while (0);
-
-/*
-
-  | 1     0       1.4021   | | Y |
-  | 1    -0.3441 -0.7142   |x| Cb|
-  | 1     1.7718  0        | | Cr|
-
-
-  Y:      [-128 127]
-  Cb/Cr : [-128 127]
-
-  typical yuv conversion work on Y: 0-255 this version has been optimized for jpeg decode.
-
-*/
-
-
-
-
-#define vec_unh(x) \
-    (vector signed short) \
-        vec_perm(x,(__typeof__(x)){0}, \
-                 ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
-                                         0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
-#define vec_unl(x) \
-    (vector signed short) \
-        vec_perm(x,(__typeof__(x)){0}, \
-                 ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
-                                         0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
-
-#define vec_clip_s16(x) \
-    vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
-                         ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
-
-#define vec_packclp(x,y) \
-    (vector unsigned char)vec_packs \
-        ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
-         (vector unsigned short)vec_max (y,((vector signed short) {0})))
-
-//#define out_pixels(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,a,a,ptr)
-
-
-static inline void cvtyuvtoRGB (SwsContext *c,
-                                vector signed short Y, vector signed short U, vector signed short V,
-                                vector signed short *R, vector signed short *G, vector signed short *B)
-{
-    vector signed   short vx,ux,uvx;
-
-    Y = vec_mradds (Y, c->CY, c->OY);
-    U  = vec_sub (U,(vector signed short)
-                    vec_splat((vector signed short){128},0));
-    V  = vec_sub (V,(vector signed short)
-                    vec_splat((vector signed short){128},0));
-
-    //   ux  = (CBU*(u<<c->CSHIFT)+0x4000)>>15;
-    ux = vec_sl (U, c->CSHIFT);
-    *B = vec_mradds (ux, c->CBU, Y);
-
-    // vx  = (CRV*(v<<c->CSHIFT)+0x4000)>>15;
-    vx = vec_sl (V, c->CSHIFT);
-    *R = vec_mradds (vx, c->CRV, Y);
-
-    // uvx = ((CGU*u) + (CGV*v))>>15;
-    uvx = vec_mradds (U, c->CGU, Y);
-    *G  = vec_mradds (V, c->CGV, uvx);
-}
-
-
-/*
-  ------------------------------------------------------------------------------
-  CS converters
-  ------------------------------------------------------------------------------
-*/
-
-
-#define DEFCSP420_CVT(name,out_pixels)                                  \
-static int altivec_##name (SwsContext *c,                               \
-                           unsigned char **in, int *instrides,          \
-                           int srcSliceY,        int srcSliceH,         \
-                           unsigned char **oplanes, int *outstrides)    \
-{                                                                       \
-    int w = c->srcW;                                                    \
-    int h = srcSliceH;                                                  \
-    int i,j;                                                            \
-    int instrides_scl[3];                                               \
-    vector unsigned char y0,y1;                                         \
-                                                                        \
-    vector signed char  u,v;                                            \
-                                                                        \
-    vector signed short Y0,Y1,Y2,Y3;                                    \
-    vector signed short U,V;                                            \
-    vector signed short vx,ux,uvx;                                      \
-    vector signed short vx0,ux0,uvx0;                                   \
-    vector signed short vx1,ux1,uvx1;                                   \
-    vector signed short R0,G0,B0;                                       \
-    vector signed short R1,G1,B1;                                       \
-    vector unsigned char R,G,B;                                         \
-                                                                        \
-    vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;                  \
-    vector unsigned char align_perm;                                    \
-                                                                        \
-    vector signed short                                                 \
-        lCY  = c->CY,                                                   \
-        lOY  = c->OY,                                                   \
-        lCRV = c->CRV,                                                  \
-        lCBU = c->CBU,                                                  \
-        lCGU = c->CGU,                                                  \
-        lCGV = c->CGV;                                                  \
-                                                                        \
-    vector unsigned short lCSHIFT = c->CSHIFT;                          \
-                                                                        \
-    ubyte *y1i   = in[0];                                               \
-    ubyte *y2i   = in[0]+instrides[0];                                  \
-    ubyte *ui    = in[1];                                               \
-    ubyte *vi    = in[2];                                               \
-                                                                        \
-    vector unsigned char *oute                                          \
-        = (vector unsigned char *)                                      \
-            (oplanes[0]+srcSliceY*outstrides[0]);                       \
-    vector unsigned char *outo                                          \
-        = (vector unsigned char *)                                      \
-            (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);         \
-                                                                        \
-                                                                        \
-    instrides_scl[0] = instrides[0]*2-w;  /* the loop moves y{1,2}i by w */ \
-    instrides_scl[1] = instrides[1]-w/2;  /* the loop moves ui by w/2 */    \
-    instrides_scl[2] = instrides[2]-w/2;  /* the loop moves vi by w/2 */    \
-                                                                        \
-                                                                        \
-    for (i=0;i<h/2;i++) {                                               \
-        vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);          \
-        vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);          \
-                                                                        \
-        for (j=0;j<w/16;j++) {                                          \
-                                                                        \
-            y1ivP = (vector unsigned char *)y1i;                        \
-            y2ivP = (vector unsigned char *)y2i;                        \
-            uivP  = (vector unsigned char *)ui;                         \
-            vivP  = (vector unsigned char *)vi;                         \
-                                                                        \
-            align_perm = vec_lvsl (0, y1i);                             \
-            y0 = (vector unsigned char)                                 \
-                 vec_perm (y1ivP[0], y1ivP[1], align_perm);             \
-                                                                        \
-            align_perm = vec_lvsl (0, y2i);                             \
-            y1 = (vector unsigned char)                                 \
-                 vec_perm (y2ivP[0], y2ivP[1], align_perm);             \
-                                                                        \
-            align_perm = vec_lvsl (0, ui);                              \
-            u = (vector signed char)                                    \
-                vec_perm (uivP[0], uivP[1], align_perm);                \
-                                                                        \
-            align_perm = vec_lvsl (0, vi);                              \
-            v = (vector signed char)                                    \
-                vec_perm (vivP[0], vivP[1], align_perm);                \
-                                                                        \
-            u  = (vector signed char)                                   \
-                 vec_sub (u,(vector signed char)                        \
-                          vec_splat((vector signed char){128},0));      \
-            v  = (vector signed char)                                   \
-                 vec_sub (v,(vector signed char)                        \
-                          vec_splat((vector signed char){128},0));      \
-                                                                        \
-            U  = vec_unpackh (u);                                       \
-            V  = vec_unpackh (v);                                       \
-                                                                        \
-                                                                        \
-            Y0 = vec_unh (y0);                                          \
-            Y1 = vec_unl (y0);                                          \
-            Y2 = vec_unh (y1);                                          \
-            Y3 = vec_unl (y1);                                          \
-                                                                        \
-            Y0 = vec_mradds (Y0, lCY, lOY);                             \
-            Y1 = vec_mradds (Y1, lCY, lOY);                             \
-            Y2 = vec_mradds (Y2, lCY, lOY);                             \
-            Y3 = vec_mradds (Y3, lCY, lOY);                             \
-                                                                        \
-            /*   ux  = (CBU*(u<<CSHIFT)+0x4000)>>15 */                  \
-            ux = vec_sl (U, lCSHIFT);                                   \
-            ux = vec_mradds (ux, lCBU, (vector signed short){0});       \
-            ux0  = vec_mergeh (ux,ux);                                  \
-            ux1  = vec_mergel (ux,ux);                                  \
-                                                                        \
-            /* vx  = (CRV*(v<<CSHIFT)+0x4000)>>15;        */            \
-            vx = vec_sl (V, lCSHIFT);                                   \
-            vx = vec_mradds (vx, lCRV, (vector signed short){0});       \
-            vx0  = vec_mergeh (vx,vx);                                  \
-            vx1  = vec_mergel (vx,vx);                                  \
-                                                                        \
-            /* uvx = ((CGU*u) + (CGV*v))>>15 */                         \
-            uvx = vec_mradds (U, lCGU, (vector signed short){0});       \
-            uvx = vec_mradds (V, lCGV, uvx);                            \
-            uvx0 = vec_mergeh (uvx,uvx);                                \
-            uvx1 = vec_mergel (uvx,uvx);                                \
-                                                                        \
-            R0 = vec_add (Y0,vx0);                                      \
-            G0 = vec_add (Y0,uvx0);                                     \
-            B0 = vec_add (Y0,ux0);                                      \
-            R1 = vec_add (Y1,vx1);                                      \
-            G1 = vec_add (Y1,uvx1);                                     \
-            B1 = vec_add (Y1,ux1);                                      \
-                                                                        \
-            R  = vec_packclp (R0,R1);                                   \
-            G  = vec_packclp (G0,G1);                                   \
-            B  = vec_packclp (B0,B1);                                   \
-                                                                        \
-            out_pixels(R,G,B,oute);                                     \
-                                                                        \
-            R0 = vec_add (Y2,vx0);                                      \
-            G0 = vec_add (Y2,uvx0);                                     \
-            B0 = vec_add (Y2,ux0);                                      \
-            R1 = vec_add (Y3,vx1);                                      \
-            G1 = vec_add (Y3,uvx1);                                     \
-            B1 = vec_add (Y3,ux1);                                      \
-            R  = vec_packclp (R0,R1);                                   \
-            G  = vec_packclp (G0,G1);                                   \
-            B  = vec_packclp (B0,B1);                                   \
-                                                                        \
-                                                                        \
-            out_pixels(R,G,B,outo);                                     \
-                                                                        \
-            y1i  += 16;                                                 \
-            y2i  += 16;                                                 \
-            ui   += 8;                                                  \
-            vi   += 8;                                                  \
-                                                                        \
-        }                                                               \
-                                                                        \
-        outo  += (outstrides[0])>>4;                                    \
-        oute  += (outstrides[0])>>4;                                    \
-                                                                        \
-        ui    += instrides_scl[1];                                      \
-        vi    += instrides_scl[2];                                      \
-        y1i   += instrides_scl[0];                                      \
-        y2i   += instrides_scl[0];                                      \
-    }                                                                   \
-    return srcSliceH;                                                   \
-}
-
-
-#define out_abgr(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
-#define out_bgra(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
-#define out_rgba(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
-#define out_argb(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
-#define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
-#define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
-
-DEFCSP420_CVT (yuv2_abgr, out_abgr)
-#if 1
-DEFCSP420_CVT (yuv2_bgra, out_bgra)
-#else
-static int altivec_yuv2_bgra32 (SwsContext *c,
-                                unsigned char **in, int *instrides,
-                                int srcSliceY,        int srcSliceH,
-                                unsigned char **oplanes, int *outstrides)
-{
-    int w = c->srcW;
-    int h = srcSliceH;
-    int i,j;
-    int instrides_scl[3];
-    vector unsigned char y0,y1;
-
-    vector signed char  u,v;
-
-    vector signed short Y0,Y1,Y2,Y3;
-    vector signed short U,V;
-    vector signed short vx,ux,uvx;
-    vector signed short vx0,ux0,uvx0;
-    vector signed short vx1,ux1,uvx1;
-    vector signed short R0,G0,B0;
-    vector signed short R1,G1,B1;
-    vector unsigned char R,G,B;
-
-    vector unsigned char *uivP, *vivP;
-    vector unsigned char align_perm;
-
-    vector signed short
-        lCY  = c->CY,
-        lOY  = c->OY,
-        lCRV = c->CRV,
-        lCBU = c->CBU,
-        lCGU = c->CGU,
-        lCGV = c->CGV;
-
-    vector unsigned short lCSHIFT = c->CSHIFT;
-
-    ubyte *y1i   = in[0];
-    ubyte *y2i   = in[0]+w;
-    ubyte *ui    = in[1];
-    ubyte *vi    = in[2];
-
-    vector unsigned char *oute
-        = (vector unsigned char *)
-          (oplanes[0]+srcSliceY*outstrides[0]);
-    vector unsigned char *outo
-        = (vector unsigned char *)
-          (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
-
-
-    instrides_scl[0] = instrides[0];
-    instrides_scl[1] = instrides[1]-w/2;  /* the loop moves ui by w/2 */
-    instrides_scl[2] = instrides[2]-w/2;  /* the loop moves vi by w/2 */
-
-
-    for (i=0;i<h/2;i++) {
-        vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
-        vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
-
-        for (j=0;j<w/16;j++) {
-
-            y0 = vec_ldl (0,y1i);
-            y1 = vec_ldl (0,y2i);
-            uivP = (vector unsigned char *)ui;
-            vivP = (vector unsigned char *)vi;
-
-            align_perm = vec_lvsl (0, ui);
-            u  = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
-
-            align_perm = vec_lvsl (0, vi);
-            v  = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
-            u  = (vector signed char)
-                 vec_sub (u,(vector signed char)
-                          vec_splat((vector signed char){128},0));
-
-            v  = (vector signed char)
-                 vec_sub (v, (vector signed char)
-                          vec_splat((vector signed char){128},0));
-
-            U  = vec_unpackh (u);
-            V  = vec_unpackh (v);
-
-
-            Y0 = vec_unh (y0);
-            Y1 = vec_unl (y0);
-            Y2 = vec_unh (y1);
-            Y3 = vec_unl (y1);
-
-            Y0 = vec_mradds (Y0, lCY, lOY);
-            Y1 = vec_mradds (Y1, lCY, lOY);
-            Y2 = vec_mradds (Y2, lCY, lOY);
-            Y3 = vec_mradds (Y3, lCY, lOY);
-
-            /*   ux  = (CBU*(u<<CSHIFT)+0x4000)>>15 */
-            ux = vec_sl (U, lCSHIFT);
-            ux = vec_mradds (ux, lCBU, (vector signed short){0});
-            ux0  = vec_mergeh (ux,ux);
-            ux1  = vec_mergel (ux,ux);
-
-            /* vx  = (CRV*(v<<CSHIFT)+0x4000)>>15;        */
-            vx = vec_sl (V, lCSHIFT);
-            vx = vec_mradds (vx, lCRV, (vector signed short){0});
-            vx0  = vec_mergeh (vx,vx);
-            vx1  = vec_mergel (vx,vx);
-            /* uvx = ((CGU*u) + (CGV*v))>>15 */
-            uvx = vec_mradds (U, lCGU, (vector signed short){0});
-            uvx = vec_mradds (V, lCGV, uvx);
-            uvx0 = vec_mergeh (uvx,uvx);
-            uvx1 = vec_mergel (uvx,uvx);
-            R0 = vec_add (Y0,vx0);
-            G0 = vec_add (Y0,uvx0);
-            B0 = vec_add (Y0,ux0);
-            R1 = vec_add (Y1,vx1);
-            G1 = vec_add (Y1,uvx1);
-            B1 = vec_add (Y1,ux1);
-            R  = vec_packclp (R0,R1);
-            G  = vec_packclp (G0,G1);
-            B  = vec_packclp (B0,B1);
-
-            out_argb(R,G,B,oute);
-            R0 = vec_add (Y2,vx0);
-            G0 = vec_add (Y2,uvx0);
-            B0 = vec_add (Y2,ux0);
-            R1 = vec_add (Y3,vx1);
-            G1 = vec_add (Y3,uvx1);
-            B1 = vec_add (Y3,ux1);
-            R  = vec_packclp (R0,R1);
-            G  = vec_packclp (G0,G1);
-            B  = vec_packclp (B0,B1);
-
-            out_argb(R,G,B,outo);
-            y1i  += 16;
-            y2i  += 16;
-            ui   += 8;
-            vi   += 8;
-
-        }
-
-        outo  += (outstrides[0])>>4;
-        oute  += (outstrides[0])>>4;
-
-        ui    += instrides_scl[1];
-        vi    += instrides_scl[2];
-        y1i   += instrides_scl[0];
-        y2i   += instrides_scl[0];
-    }
-    return srcSliceH;
-}
-
-#endif
-
-
-DEFCSP420_CVT (yuv2_rgba, out_rgba)
-DEFCSP420_CVT (yuv2_argb, out_argb)
-DEFCSP420_CVT (yuv2_rgb24,  out_rgb24)
-DEFCSP420_CVT (yuv2_bgr24,  out_bgr24)
-
-
-// uyvy|uyvy|uyvy|uyvy
-// 0123 4567 89ab cdef
-static
-const vector unsigned char
-    demux_u = {0x10,0x00,0x10,0x00,
-               0x10,0x04,0x10,0x04,
-               0x10,0x08,0x10,0x08,
-               0x10,0x0c,0x10,0x0c},
-    demux_v = {0x10,0x02,0x10,0x02,
-               0x10,0x06,0x10,0x06,
-               0x10,0x0A,0x10,0x0A,
-               0x10,0x0E,0x10,0x0E},
-    demux_y = {0x10,0x01,0x10,0x03,
-               0x10,0x05,0x10,0x07,
-               0x10,0x09,0x10,0x0B,
-               0x10,0x0D,0x10,0x0F};
-
-/*
-  this is so I can play live CCIR raw video
-*/
-static int altivec_uyvy_rgb32 (SwsContext *c,
-                               unsigned char **in, int *instrides,
-                               int srcSliceY,        int srcSliceH,
-                               unsigned char **oplanes, int *outstrides)
-{
-    int w = c->srcW;
-    int h = srcSliceH;
-    int i,j;
-    vector unsigned char uyvy;
-    vector signed   short Y,U,V;
-    vector signed   short R0,G0,B0,R1,G1,B1;
-    vector unsigned char  R,G,B;
-    vector unsigned char *out;
-    ubyte *img;
-
-    img = in[0];
-    out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
-
-    for (i=0;i<h;i++) {
-        for (j=0;j<w/16;j++) {
-            uyvy = vec_ld (0, img);
-            U = (vector signed short)
-                vec_perm (uyvy, (vector unsigned char){0}, demux_u);
-
-            V = (vector signed short)
-                vec_perm (uyvy, (vector unsigned char){0}, demux_v);
-
-            Y = (vector signed short)
-                vec_perm (uyvy, (vector unsigned char){0}, demux_y);
-
-            cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
-
-            uyvy = vec_ld (16, img);
-            U = (vector signed short)
-                vec_perm (uyvy, (vector unsigned char){0}, demux_u);
-
-            V = (vector signed short)
-                vec_perm (uyvy, (vector unsigned char){0}, demux_v);
-
-            Y = (vector signed short)
-                vec_perm (uyvy, (vector unsigned char){0}, demux_y);
-
-            cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
-
-            R  = vec_packclp (R0,R1);
-            G  = vec_packclp (G0,G1);
-            B  = vec_packclp (B0,B1);
-
-            //      vec_mstbgr24 (R,G,B, out);
-            out_rgba (R,G,B,out);
-
-            img += 32;
-        }
-    }
-    return srcSliceH;
-}
-
-
-
-/* Ok currently the acceleration routine only supports
-   inputs of widths a multiple of 16
-   and heights a multiple 2
-
-   So we just fall back to the C codes for this.
-*/
-SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c)
-{
-    if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
-        return NULL;
-
-    /*
-      and this seems not to matter too much I tried a bunch of
-      videos with abnormal widths and MPlayer crashes elsewhere.
-      mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv
-      boom with X11 bad match.
-
-    */
-    if ((c->srcW & 0xf) != 0)    return NULL;
-
-    switch (c->srcFormat) {
-    case PIX_FMT_YUV410P:
-    case PIX_FMT_YUV420P:
-    /*case IMGFMT_CLPL:        ??? */
-    case PIX_FMT_GRAY8:
-    case PIX_FMT_NV12:
-    case PIX_FMT_NV21:
-        if ((c->srcH & 0x1) != 0)
-            return NULL;
-
-        switch(c->dstFormat){
-        case PIX_FMT_RGB24:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
-            return altivec_yuv2_rgb24;
-        case PIX_FMT_BGR24:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
-            return altivec_yuv2_bgr24;
-        case PIX_FMT_ARGB:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
-            return altivec_yuv2_argb;
-        case PIX_FMT_ABGR:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
-            return altivec_yuv2_abgr;
-        case PIX_FMT_RGBA:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
-            return altivec_yuv2_rgba;
-        case PIX_FMT_BGRA:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
-            return altivec_yuv2_bgra;
-        default: return NULL;
-        }
-        break;
-
-    case PIX_FMT_UYVY422:
-        switch(c->dstFormat){
-        case PIX_FMT_BGR32:
-            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
-            return altivec_uyvy_rgb32;
-        default: return NULL;
-        }
-        break;
-
-    }
-    return NULL;
-}
-
-void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
-{
-    union {
-        signed short tmp[8] __attribute__ ((aligned(16)));
-        vector signed short vec;
-    } buf;
-
-    buf.tmp[0] =  ((0xffffLL) * contrast>>8)>>9;                        //cy
-    buf.tmp[1] =  -256*brightness;                                      //oy
-    buf.tmp[2] =  (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);   //crv
-    buf.tmp[3] =  (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);   //cbu
-    buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));  //cgu
-    buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));  //cgv
-
-
-    c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
-    c->CY   = vec_splat ((vector signed short)buf.vec, 0);
-    c->OY   = vec_splat ((vector signed short)buf.vec, 1);
-    c->CRV  = vec_splat ((vector signed short)buf.vec, 2);
-    c->CBU  = vec_splat ((vector signed short)buf.vec, 3);
-    c->CGU  = vec_splat ((vector signed short)buf.vec, 4);
-    c->CGV  = vec_splat ((vector signed short)buf.vec, 5);
-#if 0
-    {
-    int i;
-    char *v[6]={"cy","oy","crv","cbu","cgu","cgv"};
-    for (i=0; i<6; i++)
-        printf("%s %d ", v[i],buf.tmp[i] );
-        printf("\n");
-    }
-#endif
-    return;
-}
-
-
-void
-altivec_yuv2packedX (SwsContext *c,
-                     int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
-                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                     uint8_t *dest, int dstW, int dstY)
-{
-    int i,j;
-    vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
-    vector signed short R0,G0,B0,R1,G1,B1;
-
-    vector unsigned char R,G,B;
-    vector unsigned char *out,*nout;
-
-    vector signed short   RND = vec_splat_s16(1<<3);
-    vector unsigned short SCL = vec_splat_u16(4);
-    unsigned long scratch[16] __attribute__ ((aligned (16)));
-
-    vector signed short *YCoeffs, *CCoeffs;
-
-    YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
-    CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
-
-    out = (vector unsigned char *)dest;
-
-    for (i=0; i<dstW; i+=16){
-        Y0 = RND;
-        Y1 = RND;
-        /* extract 16 coeffs from lumSrc */
-        for (j=0; j<lumFilterSize; j++) {
-            X0 = vec_ld (0,  &lumSrc[j][i]);
-            X1 = vec_ld (16, &lumSrc[j][i]);
-            Y0 = vec_mradds (X0, YCoeffs[j], Y0);
-            Y1 = vec_mradds (X1, YCoeffs[j], Y1);
-        }
-
-        U = RND;
-        V = RND;
-        /* extract 8 coeffs from U,V */
-        for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
-            U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
-            V  = vec_mradds (X, CCoeffs[j], V);
-        }
-
-        /* scale and clip signals */
-        Y0 = vec_sra (Y0, SCL);
-        Y1 = vec_sra (Y1, SCL);
-        U  = vec_sra (U,  SCL);
-        V  = vec_sra (V,  SCL);
-
-        Y0 = vec_clip_s16 (Y0);
-        Y1 = vec_clip_s16 (Y1);
-        U  = vec_clip_s16 (U);
-        V  = vec_clip_s16 (V);
-
-        /* now we have
-          Y0= y0 y1 y2 y3 y4 y5 y6 y7     Y1= y8 y9 y10 y11 y12 y13 y14 y15
-          U= u0 u1 u2 u3 u4 u5 u6 u7      V= v0 v1 v2 v3 v4 v5 v6 v7
-
-          Y0= y0 y1 y2 y3 y4 y5 y6 y7    Y1= y8 y9 y10 y11 y12 y13 y14 y15
-          U0= u0 u0 u1 u1 u2 u2 u3 u3    U1= u4 u4 u5 u5 u6 u6 u7 u7
-          V0= v0 v0 v1 v1 v2 v2 v3 v3    V1= v4 v4 v5 v5 v6 v6 v7 v7
-        */
-
-        U0 = vec_mergeh (U,U);
-        V0 = vec_mergeh (V,V);
-
-        U1 = vec_mergel (U,U);
-        V1 = vec_mergel (V,V);
-
-        cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
-        cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
-
-        R  = vec_packclp (R0,R1);
-        G  = vec_packclp (G0,G1);
-        B  = vec_packclp (B0,B1);
-
-        switch(c->dstFormat) {
-            case PIX_FMT_ABGR:  out_abgr  (R,G,B,out); break;
-            case PIX_FMT_BGRA:  out_bgra  (R,G,B,out); break;
-            case PIX_FMT_RGBA:  out_rgba  (R,G,B,out); break;
-            case PIX_FMT_ARGB:  out_argb  (R,G,B,out); break;
-            case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
-            case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
-            default:
-            {
-                /* If this is reached, the caller should have called yuv2packedXinC
-                   instead. */
-                static int printed_error_message;
-                if (!printed_error_message) {
-                    av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
-                           sws_format_name(c->dstFormat));
-                    printed_error_message=1;
-                }
-                return;
-            }
-        }
-    }
-
-    if (i < dstW) {
-        i -= 16;
-
-        Y0 = RND;
-        Y1 = RND;
-        /* extract 16 coeffs from lumSrc */
-        for (j=0; j<lumFilterSize; j++) {
-            X0 = vec_ld (0,  &lumSrc[j][i]);
-            X1 = vec_ld (16, &lumSrc[j][i]);
-            Y0 = vec_mradds (X0, YCoeffs[j], Y0);
-            Y1 = vec_mradds (X1, YCoeffs[j], Y1);
-        }
-
-        U = RND;
-        V = RND;
-        /* extract 8 coeffs from U,V */
-        for (j=0; j<chrFilterSize; j++) {
-            X  = vec_ld (0, &chrSrc[j][i/2]);
-            U  = vec_mradds (X, CCoeffs[j], U);
-            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
-            V  = vec_mradds (X, CCoeffs[j], V);
-        }
-
-        /* scale and clip signals */
-        Y0 = vec_sra (Y0, SCL);
-        Y1 = vec_sra (Y1, SCL);
-        U  = vec_sra (U,  SCL);
-        V  = vec_sra (V,  SCL);
-
-        Y0 = vec_clip_s16 (Y0);
-        Y1 = vec_clip_s16 (Y1);
-        U  = vec_clip_s16 (U);
-        V  = vec_clip_s16 (V);
-
-        /* now we have
-           Y0= y0 y1 y2 y3 y4 y5 y6 y7     Y1= y8 y9 y10 y11 y12 y13 y14 y15
-           U = u0 u1 u2 u3 u4 u5 u6 u7     V = v0 v1 v2 v3 v4 v5 v6 v7
-
-           Y0= y0 y1 y2 y3 y4 y5 y6 y7    Y1= y8 y9 y10 y11 y12 y13 y14 y15
-           U0= u0 u0 u1 u1 u2 u2 u3 u3    U1= u4 u4 u5 u5 u6 u6 u7 u7
-           V0= v0 v0 v1 v1 v2 v2 v3 v3    V1= v4 v4 v5 v5 v6 v6 v7 v7
-        */
-
-        U0 = vec_mergeh (U,U);
-        V0 = vec_mergeh (V,V);
-
-        U1 = vec_mergel (U,U);
-        V1 = vec_mergel (V,V);
-
-        cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
-        cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
-
-        R  = vec_packclp (R0,R1);
-        G  = vec_packclp (G0,G1);
-        B  = vec_packclp (B0,B1);
-
-        nout = (vector unsigned char *)scratch;
-        switch(c->dstFormat) {
-            case PIX_FMT_ABGR:  out_abgr  (R,G,B,nout); break;
-            case PIX_FMT_BGRA:  out_bgra  (R,G,B,nout); break;
-            case PIX_FMT_RGBA:  out_rgba  (R,G,B,nout); break;
-            case PIX_FMT_ARGB:  out_argb  (R,G,B,nout); break;
-            case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
-            case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
-            default:
-                /* Unreachable, I think. */
-                av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
-                       sws_format_name(c->dstFormat));
-                return;
-        }
-
-        memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
-    }
-
-}
diff --git a/libswscale/yuv2rgb_bfin.c b/libswscale/yuv2rgb_bfin.c
deleted file mode 100644
index 58cc5b6a35..0000000000
--- a/libswscale/yuv2rgb_bfin.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
- *
- * Blackfin video color space converter operations
- * convert I420 YV12 to RGB in various formats
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <assert.h>
-#include "config.h"
-#include <unistd.h>
-#include "rgb2rgb.h"
-#include "swscale.h"
-#include "swscale_internal.h"
-
-#ifdef __FDPIC__
-#define L1CODE __attribute__ ((l1_text))
-#else
-#define L1CODE
-#endif
-
-void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
-                              int w, uint32_t *coeffs) L1CODE;
-
-void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
-                              int w, uint32_t *coeffs) L1CODE;
-
-void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
-                             int w, uint32_t *coeffs) L1CODE;
-
-typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
-                            int w, uint32_t *coeffs);
-
-
-static void bfin_prepare_coefficients (SwsContext *c, int rgb, int masks)
-{
-    int oy;
-    oy      = c->yOffset&0xffff;
-    oy      = oy >> 3; // keep everything U8.0 for offset calculation
-
-    c->oc   = 128*0x01010101U;
-    c->oy   =  oy*0x01010101U;
-
-    /* copy 64bit vector coeffs down to 32bit vector coeffs */
-    c->cy  = c->yCoeff;
-    c->zero = 0;
-
-    if (rgb) {
-        c->crv = c->vrCoeff;
-        c->cbu = c->ubCoeff;
-        c->cgu = c->ugCoeff;
-        c->cgv = c->vgCoeff;
-    } else {
-        c->crv = c->ubCoeff;
-        c->cbu = c->vrCoeff;
-        c->cgu = c->vgCoeff;
-        c->cgv = c->ugCoeff;
-    }
-
-
-    if (masks == 555) {
-        c->rmask = 0x001f * 0x00010001U;
-        c->gmask = 0x03e0 * 0x00010001U;
-        c->bmask = 0x7c00 * 0x00010001U;
-    } else if (masks == 565) {
-        c->rmask = 0x001f * 0x00010001U;
-        c->gmask = 0x07e0 * 0x00010001U;
-        c->bmask = 0xf800 * 0x00010001U;
-    }
-}
-
-static int core_yuv420_rgb (SwsContext *c,
-                            uint8_t **in, int *instrides,
-                            int srcSliceY, int srcSliceH,
-                            uint8_t **oplanes, int *outstrides,
-                            ltransform lcscf, int rgb, int masks)
-{
-    uint8_t *py,*pu,*pv,*op;
-    int w  = instrides[0];
-    int h2 = srcSliceH>>1;
-    int i;
-
-    bfin_prepare_coefficients (c, rgb, masks);
-
-    py = in[0];
-    pu = in[1+(1^rgb)];
-    pv = in[1+(0^rgb)];
-
-    op = oplanes[0] + srcSliceY*outstrides[0];
-
-    for (i=0;i<h2;i++) {
-
-        lcscf (py, pu, pv, op, w, &c->oy);
-
-        py += instrides[0];
-        op += outstrides[0];
-
-        lcscf (py, pu, pv, op, w, &c->oy);
-
-        py += instrides[0];
-        pu += instrides[1];
-        pv += instrides[2];
-        op += outstrides[0];
-    }
-
-    return srcSliceH;
-}
-
-
-static int bfin_yuv420_rgb555 (SwsContext *c,
-                               uint8_t **in, int *instrides,
-                               int srcSliceY, int srcSliceH,
-                               uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
-                            ff_bfin_yuv2rgb555_line, 1, 555);
-}
-
-static int bfin_yuv420_bgr555 (SwsContext *c,
-                               uint8_t **in, int *instrides,
-                               int srcSliceY, int srcSliceH,
-                               uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
-                            ff_bfin_yuv2rgb555_line, 0, 555);
-}
-
-static int bfin_yuv420_rgb24 (SwsContext *c,
-                              uint8_t **in, int *instrides,
-                              int srcSliceY, int srcSliceH,
-                              uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
-                            ff_bfin_yuv2rgb24_line, 1, 888);
-}
-
-static int bfin_yuv420_bgr24 (SwsContext *c,
-                              uint8_t **in, int *instrides,
-                              int srcSliceY, int srcSliceH,
-                              uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
-                            ff_bfin_yuv2rgb24_line, 0, 888);
-}
-
-static int bfin_yuv420_rgb565 (SwsContext *c,
-                               uint8_t **in, int *instrides,
-                               int srcSliceY, int srcSliceH,
-                               uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
-                            ff_bfin_yuv2rgb565_line, 1, 565);
-}
-
-static int bfin_yuv420_bgr565 (SwsContext *c,
-                               uint8_t **in, int *instrides,
-                               int srcSliceY, int srcSliceH,
-                               uint8_t **oplanes, int *outstrides)
-{
-    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
-                            ff_bfin_yuv2rgb565_line, 0, 565);
-}
-
-
-SwsFunc ff_bfin_yuv2rgb_get_func_ptr (SwsContext *c)
-{
-    SwsFunc f;
-
-    switch(c->dstFormat) {
-    case PIX_FMT_RGB555: f = bfin_yuv420_rgb555; break;
-    case PIX_FMT_BGR555: f = bfin_yuv420_bgr555; break;
-    case PIX_FMT_RGB565: f = bfin_yuv420_rgb565; break;
-    case PIX_FMT_BGR565: f = bfin_yuv420_bgr565; break;
-    case PIX_FMT_RGB24:  f = bfin_yuv420_rgb24;  break;
-    case PIX_FMT_BGR24:  f = bfin_yuv420_bgr24;  break;
-    default:
-        return 0;
-    }
-
-    av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n",
-           sws_format_name (c->dstFormat));
-
-    return f;
-}
diff --git a/libswscale/yuv2rgb_mlib.c b/libswscale/yuv2rgb_mlib.c
deleted file mode 100644
index 68247914e7..0000000000
--- a/libswscale/yuv2rgb_mlib.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * software YUV to RGB converter using mediaLib
- *
- * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_sys.h>
-#include <mlib_video.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "swscale.h"
-
-static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dst[], int dstStride[]){
-    if(c->srcFormat == PIX_FMT_YUV422P){
-        srcStride[1] *= 2;
-        srcStride[2] *= 2;
-    }
-
-    assert(srcStride[1] == srcStride[2]);
-
-    mlib_VideoColorYUV2ARGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
-                               srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
-    return srcSliceH;
-}
-
-static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dst[], int dstStride[]){
-    if(c->srcFormat == PIX_FMT_YUV422P){
-        srcStride[1] *= 2;
-        srcStride[2] *= 2;
-    }
-
-    assert(srcStride[1] == srcStride[2]);
-
-    mlib_VideoColorYUV2ABGR420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
-                               srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
-    return srcSliceH;
-}
-
-static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                              int srcSliceH, uint8_t* dst[], int dstStride[]){
-    if(c->srcFormat == PIX_FMT_YUV422P){
-        srcStride[1] *= 2;
-        srcStride[2] *= 2;
-    }
-
-    assert(srcStride[1] == srcStride[2]);
-
-    mlib_VideoColorYUV2RGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
-                              srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
-    return srcSliceH;
-}
-
-
-SwsFunc sws_yuv2rgb_init_mlib(SwsContext *c)
-{
-    switch(c->dstFormat){
-    case PIX_FMT_RGB24: return mlib_YUV2RGB420_24;
-    case PIX_FMT_BGR32: return mlib_YUV2ARGB420_32;
-    case PIX_FMT_RGB32: return mlib_YUV2ABGR420_32;
-    default: return NULL;
-    }
-}
-
diff --git a/libswscale/yuv2rgb_template.c b/libswscale/yuv2rgb_template.c
deleted file mode 100644
index f55568b0ab..0000000000
--- a/libswscale/yuv2rgb_template.c
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology"
- *
- * Copyright (C) 2000, Silicon Integrated System Corp
- *
- * Author: Olie Lho <ollie@sis.com.tw>
- *
- * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
- * MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support)
- * context / deglobalize stuff by Michael Niedermayer
- *
- * This file is part of mpeg2dec, a free MPEG-2 video decoder
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with mpeg2dec; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#undef MOVNTQ
-#undef EMMS
-#undef SFENCE
-
-#if HAVE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS     "femms"
-#else
-#define EMMS     "emms"
-#endif
-
-#if HAVE_MMX2
-#define MOVNTQ "movntq"
-#define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE "/nop"
-#endif
-
-#define YUV2RGB \
-    /* Do the multiply part of the conversion for even and odd pixels,
-       register usage:
-       mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
-       mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
-       mm6 -> Y even, mm7 -> Y odd */\
-    /* convert the chroma part */\
-    "punpcklbw %%mm4, %%mm0;" /* scatter 4 Cb 00 u3 00 u2 00 u1 00 u0 */ \
-    "punpcklbw %%mm4, %%mm1;" /* scatter 4 Cr 00 v3 00 v2 00 v1 00 v0 */ \
-\
-    "psllw $3, %%mm0;" /* Promote precision */ \
-    "psllw $3, %%mm1;" /* Promote precision */ \
-\
-    "psubsw "U_OFFSET"(%4), %%mm0;" /* Cb -= 128 */ \
-    "psubsw "V_OFFSET"(%4), %%mm1;" /* Cr -= 128 */ \
-\
-    "movq %%mm0, %%mm2;" /* Copy 4 Cb 00 u3 00 u2 00 u1 00 u0 */ \
-    "movq %%mm1, %%mm3;" /* Copy 4 Cr 00 v3 00 v2 00 v1 00 v0 */ \
-\
-    "pmulhw "UG_COEFF"(%4), %%mm2;" /* Mul Cb with green coeff -> Cb green */ \
-    "pmulhw "VG_COEFF"(%4), %%mm3;" /* Mul Cr with green coeff -> Cr green */ \
-\
-    "pmulhw "UB_COEFF"(%4), %%mm0;" /* Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0 */\
-    "pmulhw "VR_COEFF"(%4), %%mm1;" /* Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0 */\
-\
-    "paddsw %%mm3, %%mm2;" /* Cb green + Cr green -> Cgreen */\
-\
-    /* convert the luma part */\
-    "movq %%mm6, %%mm7;" /* Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */\
-    "pand "MANGLE(mmx_00ffw)", %%mm6;" /* get Y even 00 Y6 00 Y4 00 Y2 00 Y0 */\
-\
-    "psrlw $8, %%mm7;" /* get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 */\
-\
-    "psllw $3, %%mm6;" /* Promote precision */\
-    "psllw $3, %%mm7;" /* Promote precision */\
-\
-    "psubw "Y_OFFSET"(%4), %%mm6;" /* Y -= 16 */\
-    "psubw "Y_OFFSET"(%4), %%mm7;" /* Y -= 16 */\
-\
-    "pmulhw "Y_COEFF"(%4), %%mm6;" /* Mul 4 Y even 00 y6 00 y4 00 y2 00 y0 */\
-    "pmulhw "Y_COEFF"(%4), %%mm7;" /* Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1 */\
-\
-    /* Do the addition part of the conversion for even and odd pixels,
-       register usage:
-       mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
-       mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
-       mm6 -> Y even, mm7 -> Y odd */\
-    "movq %%mm0, %%mm3;" /* Copy Cblue */\
-    "movq %%mm1, %%mm4;" /* Copy Cred */\
-    "movq %%mm2, %%mm5;" /* Copy Cgreen */\
-\
-    "paddsw %%mm6, %%mm0;" /* Y even + Cblue 00 B6 00 B4 00 B2 00 B0 */\
-    "paddsw %%mm7, %%mm3;" /* Y odd + Cblue 00 B7 00 B5 00 B3 00 B1 */\
-\
-    "paddsw %%mm6, %%mm1;" /* Y even + Cred 00 R6 00 R4 00 R2 00 R0 */\
-    "paddsw %%mm7, %%mm4;" /* Y odd + Cred 00 R7 00 R5 00 R3 00 R1 */\
-\
-    "paddsw %%mm6, %%mm2;" /* Y even + Cgreen 00 G6 00 G4 00 G2 00 G0 */\
-    "paddsw %%mm7, %%mm5;" /* Y odd + Cgreen 00 G7 00 G5 00 G3 00 G1 */\
-\
-    /* Limit RGB even to 0..255 */\
-    "packuswb %%mm0, %%mm0;" /* B6 B4 B2 B0  B6 B4 B2 B0 */\
-    "packuswb %%mm1, %%mm1;" /* R6 R4 R2 R0  R6 R4 R2 R0 */\
-    "packuswb %%mm2, %%mm2;" /* G6 G4 G2 G0  G6 G4 G2 G0 */\
-\
-    /* Limit RGB odd to 0..255 */\
-    "packuswb %%mm3, %%mm3;" /* B7 B5 B3 B1  B7 B5 B3 B1 */\
-    "packuswb %%mm4, %%mm4;" /* R7 R5 R3 R1  R7 R5 R3 R1 */\
-    "packuswb %%mm5, %%mm5;" /* G7 G5 G3 G1  G7 G5 G3 G1 */\
-\
-    /* Interleave RGB even and odd */\
-    "punpcklbw %%mm3, %%mm0;" /* B7 B6 B5 B4 B3 B2 B1 B0 */\
-    "punpcklbw %%mm4, %%mm1;" /* R7 R6 R5 R4 R3 R2 R1 R0 */\
-    "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\
-
-
-#define YUV422_UNSHIFT                   \
-    if(c->srcFormat == PIX_FMT_YUV422P){ \
-        srcStride[1] *= 2;               \
-        srcStride[2] *= 2;               \
-    }                                    \
-
-#define YUV2RGB_LOOP(depth)                                   \
-    h_size= (c->dstW+7)&~7;                                   \
-    if(h_size*depth > FFABS(dstStride[0])) h_size-=8;         \
-\
-    __asm__ volatile ("pxor %mm4, %mm4;" /* zero mm4 */ );    \
-    for (y= 0; y<srcSliceH; y++ ) {                           \
-        uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0]; \
-        uint8_t *py = src[0] + y*srcStride[0];                \
-        uint8_t *pu = src[1] + (y>>1)*srcStride[1];           \
-        uint8_t *pv = src[2] + (y>>1)*srcStride[2];           \
-        long index= -h_size/2;                                \
-
-#define YUV2RGB_INIT                                                       \
-        /* This MMX assembly code deals with a SINGLE scan line at a time, \
-         * it converts 8 pixels in each iteration. */                      \
-        __asm__ volatile (                                                 \
-        /* load data for start of next scan line */                        \
-        "movd    (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \
-        "movd    (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \
-        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
-        /*                                                                 \
-        ".balign 16     \n\t"                                              \
-        */                                                                 \
-        "1:             \n\t"                                              \
-        /* No speed difference on my p3@500 with prefetch,                 \
-         * if it is faster for anyone with -benchmark then tell me.        \
-        PREFETCH" 64(%0) \n\t"                                             \
-        PREFETCH" 64(%1) \n\t"                                             \
-        PREFETCH" 64(%2) \n\t"                                             \
-        */                                                                 \
-
-#define YUV2RGB_ENDLOOP(depth) \
-        "add $"AV_STRINGIFY(depth*8)", %1    \n\t" \
-        "add                       $4, %0    \n\t" \
-        " js                       1b        \n\t" \
-\
-        : "+r" (index), "+r" (image) \
-        : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) \
-        ); \
-    } \
-    __asm__ volatile (EMMS); \
-    return srcSliceH; \
-
-static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
-    int y, h_size;
-
-    YUV422_UNSHIFT
-    YUV2RGB_LOOP(2)
-
-        c->blueDither= ff_dither8[y&1];
-        c->greenDither= ff_dither4[y&1];
-        c->redDither= ff_dither8[(y+1)&1];
-
-        YUV2RGB_INIT
-        YUV2RGB
-
-#ifdef DITHER1XBPP
-        "paddusb "BLUE_DITHER"(%4), %%mm0;"
-        "paddusb "GREEN_DITHER"(%4), %%mm2;"
-        "paddusb "RED_DITHER"(%4), %%mm1;"
-#endif
-        /* mask unneeded bits off */
-        "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
-        "pand "MANGLE(mmx_grnmask)", %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */
-        "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
-
-        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
-        "pxor %%mm4, %%mm4;" /* zero mm4 */
-
-        "movq %%mm0, %%mm5;" /* Copy B7-B0 */
-        "movq %%mm2, %%mm7;" /* Copy G7-G0 */
-
-        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
-        "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
-        "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
-
-        "psllw  $3, %%mm2;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */
-        "por %%mm2, %%mm0;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */
-
-        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-        MOVNTQ "      %%mm0, (%1);" /* store pixel 0-3 */
-
-        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
-        "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
-        "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
-
-        "psllw        $3, %%mm7;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */
-        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
-
-        "por       %%mm7, %%mm5;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */
-        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
-
-        MOVNTQ "   %%mm5, 8 (%1);" /* store pixel 4-7 */
-
-    YUV2RGB_ENDLOOP(2)
-}
-
-static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
-    int y, h_size;
-
-    YUV422_UNSHIFT
-    YUV2RGB_LOOP(2)
-
-        c->blueDither= ff_dither8[y&1];
-        c->greenDither= ff_dither8[y&1];
-        c->redDither= ff_dither8[(y+1)&1];
-
-        YUV2RGB_INIT
-        YUV2RGB
-
-#ifdef DITHER1XBPP
-        "paddusb "BLUE_DITHER"(%4), %%mm0  \n\t"
-        "paddusb "GREEN_DITHER"(%4), %%mm2  \n\t"
-        "paddusb "RED_DITHER"(%4), %%mm1  \n\t"
-#endif
-
-        /* mask unneeded bits off */
-        "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
-        "pand "MANGLE(mmx_redmask)", %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */
-        "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
-
-        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
-        "psrlw   $1, %%mm1;" /* 0_r7r6r5  r4r3_0_0 0_r7r6r5 r4r3_0_0 */
-        "pxor %%mm4, %%mm4;" /* zero mm4 */
-
-        "movq %%mm0, %%mm5;" /* Copy B7-B0 */
-        "movq %%mm2, %%mm7;" /* Copy G7-G0 */
-
-        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
-        "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */
-        "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
-
-        "psllw  $2, %%mm2;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */
-        "por %%mm2, %%mm0;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */
-
-        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-        MOVNTQ "      %%mm0, (%1);"  /* store pixel 0-3 */
-
-        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
-        "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */
-        "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
-
-        "psllw        $2, %%mm7;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */
-        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
-
-        "por       %%mm7, %%mm5;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */
-        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
-
-        MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
-
-    YUV2RGB_ENDLOOP(2)
-}
-
-static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
-    int y, h_size;
-
-    YUV422_UNSHIFT
-    YUV2RGB_LOOP(3)
-
-        YUV2RGB_INIT
-        YUV2RGB
-        /* mm0=B, %%mm2=G, %%mm1=R */
-#if HAVE_MMX2
-        "movq "MANGLE(ff_M24A)", %%mm4     \n\t"
-        "movq "MANGLE(ff_M24C)", %%mm7     \n\t"
-        "pshufw $0x50, %%mm0, %%mm5     \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */
-        "pshufw $0x50, %%mm2, %%mm3     \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */
-        "pshufw $0x00, %%mm1, %%mm6     \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */
-
-        "pand   %%mm4, %%mm5            \n\t" /*    B2        B1       B0 */
-        "pand   %%mm4, %%mm3            \n\t" /*    G2        G1       G0 */
-        "pand   %%mm7, %%mm6            \n\t" /*       R1        R0       */
-
-        "psllq     $8, %%mm3            \n\t" /* G2        G1       G0    */
-        "por    %%mm5, %%mm6            \n\t"
-        "por    %%mm3, %%mm6            \n\t"
-        MOVNTQ" %%mm6, (%1)             \n\t"
-
-        "psrlq     $8, %%mm2            \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */
-        "pshufw $0xA5, %%mm0, %%mm5     \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */
-        "pshufw $0x55, %%mm2, %%mm3     \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */
-        "pshufw $0xA5, %%mm1, %%mm6     \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */
-
-        "pand "MANGLE(ff_M24B)", %%mm5     \n\t" /* B5       B4        B3    */
-        "pand          %%mm7, %%mm3     \n\t" /*       G4        G3       */
-        "pand          %%mm4, %%mm6     \n\t" /*    R4        R3       R2 */
-
-        "por    %%mm5, %%mm3            \n\t" /* B5    G4 B4     G3 B3    */
-        "por    %%mm3, %%mm6            \n\t"
-        MOVNTQ" %%mm6, 8(%1)            \n\t"
-
-        "pshufw $0xFF, %%mm0, %%mm5     \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */
-        "pshufw $0xFA, %%mm2, %%mm3     \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */
-        "pshufw $0xFA, %%mm1, %%mm6     \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */
-        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
-
-        "pand          %%mm7, %%mm5     \n\t" /*       B7        B6       */
-        "pand          %%mm4, %%mm3     \n\t" /*    G7        G6       G5 */
-        "pand "MANGLE(ff_M24B)", %%mm6     \n\t" /* R7       R6        R5    */
-        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
-\
-        "por          %%mm5, %%mm3      \n\t"
-        "por          %%mm3, %%mm6      \n\t"
-        MOVNTQ"       %%mm6, 16(%1)     \n\t"
-        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-        "pxor         %%mm4, %%mm4      \n\t"
-
-#else
-
-        "pxor      %%mm4, %%mm4     \n\t"
-        "movq      %%mm0, %%mm5     \n\t" /* B */
-        "movq      %%mm1, %%mm6     \n\t" /* R */
-        "punpcklbw %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */
-        "punpcklbw %%mm4, %%mm1     \n\t" /* 0R0R0R0R 0 */
-        "punpckhbw %%mm2, %%mm5     \n\t" /* GBGBGBGB 2 */
-        "punpckhbw %%mm4, %%mm6     \n\t" /* 0R0R0R0R 2 */
-        "movq      %%mm0, %%mm7     \n\t" /* GBGBGBGB 0 */
-        "movq      %%mm5, %%mm3     \n\t" /* GBGBGBGB 2 */
-        "punpcklwd %%mm1, %%mm7     \n\t" /* 0RGB0RGB 0 */
-        "punpckhwd %%mm1, %%mm0     \n\t" /* 0RGB0RGB 1 */
-        "punpcklwd %%mm6, %%mm5     \n\t" /* 0RGB0RGB 2 */
-        "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */
-
-        "movq      %%mm7, %%mm2     \n\t" /* 0RGB0RGB 0 */
-        "movq      %%mm0, %%mm6     \n\t" /* 0RGB0RGB 1 */
-        "movq      %%mm5, %%mm1     \n\t" /* 0RGB0RGB 2 */
-        "movq      %%mm3, %%mm4     \n\t" /* 0RGB0RGB 3 */
-
-        "psllq       $40, %%mm7     \n\t" /* RGB00000 0 */
-        "psllq       $40, %%mm0     \n\t" /* RGB00000 1 */
-        "psllq       $40, %%mm5     \n\t" /* RGB00000 2 */
-        "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */
-
-        "punpckhdq %%mm2, %%mm7     \n\t" /* 0RGBRGB0 0 */
-        "punpckhdq %%mm6, %%mm0     \n\t" /* 0RGBRGB0 1 */
-        "punpckhdq %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */
-        "punpckhdq %%mm4, %%mm3     \n\t" /* 0RGBRGB0 3 */
-
-        "psrlq        $8, %%mm7     \n\t" /* 00RGBRGB 0 */
-        "movq      %%mm0, %%mm6     \n\t" /* 0RGBRGB0 1 */
-        "psllq       $40, %%mm0     \n\t" /* GB000000 1 */
-        "por       %%mm0, %%mm7     \n\t" /* GBRGBRGB 0 */
-        MOVNTQ"    %%mm7, (%1)      \n\t"
-
-        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
-
-        "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */
-        "movq      %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */
-        "psllq       $24, %%mm5     \n\t" /* BRGB0000 2 */
-        "por       %%mm5, %%mm6     \n\t" /* BRGBRGBR 1 */
-        MOVNTQ"    %%mm6, 8(%1)     \n\t"
-
-        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
-
-        "psrlq       $40, %%mm1     \n\t" /* 000000RG 2 */
-        "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */
-        "por       %%mm3, %%mm1     \n\t" /* RGBRGBRG 2 */
-        MOVNTQ"    %%mm1, 16(%1)    \n\t"
-
-        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
-        "pxor      %%mm4, %%mm4     \n\t"
-#endif
-
-    YUV2RGB_ENDLOOP(3)
-}
-
-#define RGB_PLANAR2PACKED32                                             \
-    /* convert RGB plane to RGB packed format,                          \
-       mm0 ->  B, mm1 -> R, mm2 -> G, mm3 -> A,                         \
-       mm4 -> GB, mm5 -> AR pixel 4-7,                                  \
-       mm6 -> GB, mm7 -> AR pixel 0-3 */                                \
-    "movq      %%mm0, %%mm6;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
-    "movq      %%mm1, %%mm7;"   /* R7 R6 R5 R4 R3 R2 R1 R0 */           \
-\
-    "movq      %%mm0, %%mm4;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
-    "movq      %%mm1, %%mm5;"   /* R7 R6 R5 R4 R3 R2 R1 R0 */           \
-\
-    "punpcklbw %%mm2, %%mm6;"   /* G3 B3 G2 B2 G1 B1 G0 B0 */           \
-    "punpcklbw %%mm3, %%mm7;"   /* A3 R3 A2 R2 A1 R1 A0 R0 */           \
-\
-    "punpcklwd %%mm7, %%mm6;"   /* A1 R1 B1 G1 A0 R0 B0 G0 */           \
-    MOVNTQ "   %%mm6, (%1);"    /* Store ARGB1 ARGB0 */                 \
-\
-    "movq      %%mm0, %%mm6;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
-    "punpcklbw %%mm2, %%mm6;"   /* G3 B3 G2 B2 G1 B1 G0 B0 */           \
-\
-    "punpckhwd %%mm7, %%mm6;"   /* A3 R3 G3 B3 A2 R2 B3 G2 */           \
-    MOVNTQ "   %%mm6, 8 (%1);"  /* Store ARGB3 ARGB2 */                 \
-\
-    "punpckhbw %%mm2, %%mm4;"   /* G7 B7 G6 B6 G5 B5 G4 B4 */           \
-    "punpckhbw %%mm3, %%mm5;"   /* A7 R7 A6 R6 A5 R5 A4 R4 */           \
-\
-    "punpcklwd %%mm5, %%mm4;"   /* A5 R5 B5 G5 A4 R4 B4 G4 */           \
-    MOVNTQ "   %%mm4, 16 (%1);" /* Store ARGB5 ARGB4 */                 \
-\
-    "movq      %%mm0, %%mm4;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
-    "punpckhbw %%mm2, %%mm4;"   /* G7 B7 G6 B6 G5 B5 G4 B4 */           \
-\
-    "punpckhwd %%mm5, %%mm4;"   /* A7 R7 G7 B7 A6 R6 B6 G6 */           \
-    MOVNTQ "   %%mm4, 24 (%1);" /* Store ARGB7 ARGB6 */                 \
-\
-    "movd 4 (%2, %0), %%mm0;"   /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \
-    "movd 4 (%3, %0), %%mm1;"   /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \
-\
-    "pxor         %%mm4, %%mm4;" /* zero mm4 */                         \
-    "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
-
-static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
-    int y, h_size;
-
-    YUV422_UNSHIFT
-    YUV2RGB_LOOP(4)
-
-        YUV2RGB_INIT
-        YUV2RGB
-        "pcmpeqd   %%mm3, %%mm3;"   /* fill mm3 */
-        RGB_PLANAR2PACKED32
-
-    YUV2RGB_ENDLOOP(4)
-}
diff --git a/libswscale/yuv2rgb_vis.c b/libswscale/yuv2rgb_vis.c
deleted file mode 100644
index 2e2737aa9f..0000000000
--- a/libswscale/yuv2rgb_vis.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * VIS optimized software YUV to RGB converter
- * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-#include <stdlib.h>
-
-#include "swscale.h"
-#include "swscale_internal.h"
-
-#define YUV2RGB_INIT \
-    "wr %%g0, 0x10, %%gsr \n\t" \
-    "ldd [%5], %%f32      \n\t" \
-    "ldd [%5+8], %%f34    \n\t" \
-    "ldd [%5+16], %%f36   \n\t" \
-    "ldd [%5+24], %%f38   \n\t" \
-    "ldd [%5+32], %%f40   \n\t" \
-    "ldd [%5+40], %%f42   \n\t" \
-    "ldd [%5+48], %%f44   \n\t" \
-    "ldd [%5+56], %%f46   \n\t" \
-    "ldd [%5+64], %%f48   \n\t" \
-    "ldd [%5+72], %%f50   \n\t"
-
-#define YUV2RGB_KERNEL \
-    /* ^^^^ f0=Y f3=u f5=v */ \
-    "fmul8x16 %%f3, %%f48, %%f6   \n\t" \
-    "fmul8x16 %%f19, %%f48, %%f22 \n\t" \
-    "fmul8x16 %%f5, %%f44, %%f8   \n\t" \
-    "fmul8x16 %%f21, %%f44, %%f24 \n\t" \
-    "fmul8x16 %%f0, %%f42, %%f0   \n\t" \
-    "fmul8x16 %%f16, %%f42, %%f16 \n\t" \
-    "fmul8x16 %%f3, %%f50, %%f2   \n\t" \
-    "fmul8x16 %%f19, %%f50, %%f18 \n\t" \
-    "fmul8x16 %%f5, %%f46, %%f4   \n\t" \
-    "fmul8x16 %%f21, %%f46, %%f20 \n\t" \
-    \
-    "fpsub16 %%f6, %%f34, %%f6   \n\t" /* 1 */ \
-    "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \
-    "fpsub16 %%f8, %%f38, %%f8   \n\t" /* 3 */ \
-    "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \
-    "fpsub16 %%f0, %%f32, %%f0   \n\t" /* 0 */ \
-    "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \
-    "fpsub16 %%f2, %%f36, %%f2   \n\t" /* 2 */ \
-    "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \
-    "fpsub16 %%f4, %%f40, %%f4   \n\t" /* 4 */ \
-    "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \
-    \
-    "fpadd16 %%f0, %%f8, %%f8    \n\t" /* Gt */ \
-    "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \
-    "fpadd16 %%f0, %%f4, %%f4    \n\t" /* R */ \
-    "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \
-    "fpadd16 %%f0, %%f6, %%f6    \n\t" /* B */ \
-    "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \
-    "fpadd16 %%f8, %%f2, %%f2    \n\t" /* G */ \
-    "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \
-    \
-    "fpack16 %%f4, %%f4    \n\t" \
-    "fpack16 %%f20, %%f20  \n\t" \
-    "fpack16 %%f6, %%f6    \n\t" \
-    "fpack16 %%f22, %%f22  \n\t" \
-    "fpack16 %%f2, %%f2    \n\t" \
-    "fpack16 %%f18, %%f18  \n\t"
-
-
-
-// FIXME: must be changed to set alpha to 255 instead of 0
-static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[]){
-  int y, out1, out2, out3, out4, out5, out6;
-
-  for(y=0;y < srcSliceH;++y) {
-      __asm__ volatile (
-          YUV2RGB_INIT
-          "wr %%g0, 0xd2, %%asi        \n\t" /* ASI_FL16_P */
-          "1:                          \n\t"
-          "ldda [%1] %%asi, %%f2       \n\t"
-          "ldda [%1+2] %%asi, %%f18    \n\t"
-          "ldda [%2] %%asi, %%f4       \n\t"
-          "ldda [%2+2] %%asi, %%f20    \n\t"
-          "ld [%0], %%f0               \n\t"
-          "ld [%0+4], %%f16            \n\t"
-          "fpmerge %%f3, %%f3, %%f2    \n\t"
-          "fpmerge %%f19, %%f19, %%f18 \n\t"
-          "fpmerge %%f5, %%f5, %%f4    \n\t"
-          "fpmerge %%f21, %%f21, %%f20 \n\t"
-          YUV2RGB_KERNEL
-          "fzero %%f0                  \n\t"
-          "fpmerge %%f4, %%f6, %%f8    \n\t"  // r,b,t1
-          "fpmerge %%f20, %%f22, %%f24 \n\t"  // r,b,t1
-          "fpmerge %%f0, %%f2, %%f10   \n\t"  // 0,g,t2
-          "fpmerge %%f0, %%f18, %%f26  \n\t"  // 0,g,t2
-          "fpmerge %%f10, %%f8, %%f4   \n\t"  // t2,t1,msb
-          "fpmerge %%f26, %%f24, %%f20 \n\t"  // t2,t1,msb
-          "fpmerge %%f11, %%f9, %%f6   \n\t"  // t2,t1,lsb
-          "fpmerge %%f27, %%f25, %%f22 \n\t"  // t2,t1,lsb
-          "std %%f4, [%3]              \n\t"
-          "std %%f20, [%3+16]          \n\t"
-          "std %%f6, [%3+8]            \n\t"
-          "std %%f22, [%3+24]          \n\t"
-
-          "add %0, 8, %0   \n\t"
-          "add %1, 4, %1   \n\t"
-          "add %2, 4, %2   \n\t"
-          "subcc %4, 8, %4 \n\t"
-          "bne 1b          \n\t"
-          "add %3, 32, %3  \n\t" //delay slot
-          : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
-          : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+((y+srcSliceY)>>1)*srcStride[1]),
-            "2" (src[2]+((y+srcSliceY)>>1)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]),
-            "4" (c->dstW),
-            "5" (c->sparc_coeffs)
-      );
-  }
-
-  return srcSliceH;
-}
-
-// FIXME: must be changed to set alpha to 255 instead of 0
-static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                           int srcSliceH, uint8_t* dst[], int dstStride[]){
-  int y, out1, out2, out3, out4, out5, out6;
-
-  for(y=0;y < srcSliceH;++y) {
-      __asm__ volatile (
-          YUV2RGB_INIT
-          "wr %%g0, 0xd2, %%asi        \n\t" /* ASI_FL16_P */
-          "1:                          \n\t"
-          "ldda [%1] %%asi, %%f2       \n\t"
-          "ldda [%1+2] %%asi, %%f18    \n\t"
-          "ldda [%2] %%asi, %%f4       \n\t"
-          "ldda [%2+2] %%asi, %%f20    \n\t"
-          "ld [%0], %%f0               \n\t"
-          "ld [%0+4], %%f16            \n\t"
-          "fpmerge %%f3, %%f3, %%f2    \n\t"
-          "fpmerge %%f19, %%f19, %%f18 \n\t"
-          "fpmerge %%f5, %%f5, %%f4    \n\t"
-          "fpmerge %%f21, %%f21, %%f20 \n\t"
-          YUV2RGB_KERNEL
-          "fzero %%f0 \n\t"
-          "fpmerge %%f4, %%f6, %%f8    \n\t"  // r,b,t1
-          "fpmerge %%f20, %%f22, %%f24 \n\t"  // r,b,t1
-          "fpmerge %%f0, %%f2, %%f10   \n\t"  // 0,g,t2
-          "fpmerge %%f0, %%f18, %%f26  \n\t"  // 0,g,t2
-          "fpmerge %%f10, %%f8, %%f4   \n\t"  // t2,t1,msb
-          "fpmerge %%f26, %%f24, %%f20 \n\t"  // t2,t1,msb
-          "fpmerge %%f11, %%f9, %%f6   \n\t"  // t2,t1,lsb
-          "fpmerge %%f27, %%f25, %%f22 \n\t"  // t2,t1,lsb
-          "std %%f4, [%3]              \n\t"
-          "std %%f20, [%3+16]          \n\t"
-          "std %%f6, [%3+8]            \n\t"
-          "std %%f22, [%3+24]          \n\t"
-
-          "add %0, 8, %0   \n\t"
-          "add %1, 4, %1   \n\t"
-          "add %2, 4, %2   \n\t"
-          "subcc %4, 8, %4 \n\t"
-          "bne 1b          \n\t"
-          "add %3, 32, %3  \n\t" //delay slot
-          : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
-          : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+(y+srcSliceY)*srcStride[1]),
-            "2" (src[2]+(y+srcSliceY)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]),
-            "4" (c->dstW),
-            "5" (c->sparc_coeffs)
-      );
-  }
-
-  return srcSliceH;
-}
-
-SwsFunc sws_yuv2rgb_init_vis(SwsContext *c) {
-    c->sparc_coeffs[5]=c->yCoeff;
-    c->sparc_coeffs[6]=c->vgCoeff;
-    c->sparc_coeffs[7]=c->vrCoeff;
-    c->sparc_coeffs[8]=c->ubCoeff;
-    c->sparc_coeffs[9]=c->ugCoeff;
-
-    c->sparc_coeffs[0]=(((int16_t)c->yOffset*(int16_t)c->yCoeff >>11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[1]=(((int16_t)c->uOffset*(int16_t)c->ubCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[2]=(((int16_t)c->uOffset*(int16_t)c->ugCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[3]=(((int16_t)c->vOffset*(int16_t)c->vgCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
-    c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
-
-    if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) {
-        av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n");
-        return vis_422P_ARGB32;
-    }
-    else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) {
-        av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n");
-        return vis_420P_ARGB32;
-    }
-    return NULL;
-}

From 07679e680c01a6a96b66ed885cc4c8ddd906b124 Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Mon, 2 Mar 2009 20:32:24 +0000
Subject: [PATCH 002/315] revert r16717, r16718, r16719, EAGAIN handling, this
 causes FFserver to hang

Originally committed as revision 17737 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 ffmpeg.c             |  5 +----
 libavformat/flvdec.c |  7 ++++---
 libavformat/utils.c  | 15 +++------------
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 8ec1f471a1..05699bc9bb 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2102,10 +2102,7 @@ static int av_encode(AVFormatContext **output_files,
 
         /* read a frame from it and output it in the fifo */
         is = input_files[file_index];
-        ret= av_read_frame(is, &pkt);
-        if(ret == AVERROR(EAGAIN) && strcmp(is->iformat->name, "ffm"))
-            continue;
-        if (ret < 0) {
+        if (av_read_frame(is, &pkt) < 0) {
             file_table[file_index].eof_reached = 1;
             if (opt_shortest)
                 break;
diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index c925e54539..9bc2517b15 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -313,6 +313,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
     int64_t dts, pts = AV_NOPTS_VALUE;
     AVStream *st = NULL;
 
+ retry:
  for(;;){
     pos = url_ftell(s->pb);
     url_fskip(s->pb, 4); /* size of previous packet */
@@ -348,7 +349,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
             av_log(s, AV_LOG_ERROR, "skipping flv packet: type %d, size %d, flags %d\n", type, size, flags);
     skip:
         url_fseek(s->pb, next, SEEK_SET);
-        return AVERROR(EAGAIN);
+        continue;
     }
 
     /* skip empty data packets */
@@ -372,7 +373,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
        || st->discard >= AVDISCARD_ALL
        ){
         url_fseek(s->pb, next, SEEK_SET);
-        return AVERROR(EAGAIN);
+        continue;
     }
     if ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY)
         av_add_index_entry(st, pos, dts, size, 0, AVINDEX_KEYFRAME);
@@ -435,7 +436,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
                         st->codec->channels, st->codec->sample_rate);
             }
 
-            return AVERROR(EAGAIN);
+            goto retry;
         }
     }
 
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 900bd206e1..0ffe96a00b 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1515,10 +1515,7 @@ static int av_seek_frame_generic(AVFormatContext *s,
                 return ret;
         }
         for(i=0;; i++) {
-            int ret;
-            do{
-                ret = av_read_frame(s, &pkt);
-            }while(ret == AVERROR(EAGAIN));
+            int ret = av_read_frame(s, &pkt);
             if(ret<0)
                 break;
             av_free_packet(&pkt);
@@ -1741,9 +1738,7 @@ static void av_estimate_timings_from_pts(AVFormatContext *ic, int64_t old_offset
         if (i == ic->nb_streams)
             break;
 
-        do{
-            ret = av_read_packet(ic, pkt);
-        }while(ret == AVERROR(EAGAIN));
+        ret = av_read_packet(ic, pkt);
         if (ret != 0)
             break;
         read_size += pkt->size;
@@ -1768,9 +1763,7 @@ static void av_estimate_timings_from_pts(AVFormatContext *ic, int64_t old_offset
         if (read_size >= DURATION_MAX_READ_SIZE)
             break;
 
-        do{
-            ret = av_read_packet(ic, pkt);
-        }while(ret == AVERROR(EAGAIN));
+        ret = av_read_packet(ic, pkt);
         if (ret != 0)
             break;
         read_size += pkt->size;
@@ -2079,8 +2072,6 @@ int av_find_stream_info(AVFormatContext *ic)
         /* NOTE: a new stream can be added there if no header in file
            (AVFMTCTX_NOHEADER) */
         ret = av_read_frame_internal(ic, &pkt1);
-        if(ret == AVERROR(EAGAIN))
-            continue;
         if (ret < 0) {
             /* EOF or error */
             ret = -1; /* we could not have all the codec parameters before EOF */

From 3aafe8248560bcc952ccc86bd44b220d10c4d879 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 3 Mar 2009 12:47:47 +0000
Subject: [PATCH 003/315] Output 0.5 as version string.

Originally committed as revision 17754 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 version.sh | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/version.sh b/version.sh
index adaa46aa91..fce5d0da7d 100755
--- a/version.sh
+++ b/version.sh
@@ -1,20 +1,6 @@
 #!/bin/sh
 
-# check for SVN revision number
-revision=$(cat snapshot_version 2> /dev/null)
-test $revision || revision=$(cd "$1" && LC_ALL=C svn info 2> /dev/null | grep Revision | cut -d' ' -f2)
-test $revision || revision=$(cd "$1" && grep revision .svn/entries 2>/dev/null | cut -d '"' -f2)
-test $revision || revision=$(cd "$1" && sed -n -e '/^dir$/{n;p;q}' .svn/entries 2>/dev/null)
-test $revision && revision=SVN-r$revision
-
-# check for git short hash
-if ! test $revision; then
-    revision=$(cd "$1" && git log -1 --pretty=format:%h)
-    test $revision && revision=git-$revision
-fi
-
-# no version number found
-test $revision || revision=UNKNOWN
+revision=0.5
 
 test -n "$3" && revision=$revision-$3
 

From 58af0caf04485bb43cba18183aaf8ed2e7e7003c Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Tue, 3 Mar 2009 23:04:47 +0000
Subject: [PATCH 004/315] Add some release notes for this 0.5 release branch

Originally committed as revision 17787 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 RELEASE

diff --git a/RELEASE b/RELEASE
new file mode 100644
index 0000000000..96a0b0c92e
--- /dev/null
+++ b/RELEASE
@@ -0,0 +1,38 @@
+Release Notes
+=============
+
+* 0.5 "Bike Shed" March 3, 2009
+
+General notes
+-------------
+
+It has been so long since the last release that this should be considered the
+first FFmpeg release of recent times. Because of the way things have unfolded to
+date, the notes for this version cannot be entirely conventional.
+
+See the Changelog file for a list of significant changes.
+
+Please note that our policy on bug reports has not changed. We still only accept
+bug reports against the HEAD of FFmpeg trunk repository. If you are experiencing
+any issues with any formally released version of FFmpeg, please try a current
+version of the development code to check if the issue still exists. If it does,
+make your report against the development code following the usual bug reporting
+guidelines.
+
+API notes
+---------
+
+In the next release, it is intended to remove a number of deprecated APIs. We
+decided to put out a release that includes said APIs for the benefit of third
+party software.
+
+As such, this release:
+- provides a sync point for said APIs
+- increases awareness of API changes
+- allows the next release to detail how to transition from the old to the new
+
+The deprecated APIs to be removed are:
+- imgconvert (to be replaced by libswscale)
+- vhook (to be replaced by libavfilter)
+
+If at all possible, do not use the deprecated APIs.

From df4763a782613211b2aef78314d08a441bbe75d2 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Wed, 4 Mar 2009 16:52:37 +0000
Subject: [PATCH 005/315] Correct grammar in one sentence and add a note about
 doc/APIchanges

Originally committed as revision 17805 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/RELEASE b/RELEASE
index 96a0b0c92e..6b02332ca6 100644
--- a/RELEASE
+++ b/RELEASE
@@ -13,7 +13,7 @@ date, the notes for this version cannot be entirely conventional.
 See the Changelog file for a list of significant changes.
 
 Please note that our policy on bug reports has not changed. We still only accept
-bug reports against the HEAD of FFmpeg trunk repository. If you are experiencing
+bug reports against HEAD of the FFmpeg trunk repository. If you are experiencing
 any issues with any formally released version of FFmpeg, please try a current
 version of the development code to check if the issue still exists. If it does,
 make your report against the development code following the usual bug reporting
@@ -35,4 +35,5 @@ The deprecated APIs to be removed are:
 - imgconvert (to be replaced by libswscale)
 - vhook (to be replaced by libavfilter)
 
-If at all possible, do not use the deprecated APIs.
+If at all possible, do not use the deprecated APIs. All notes on API changes
+should appear in doc/APIchanges.

From f8429ed58cefea1669bc127cf2b1905b4893e3f2 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 8 Mar 2009 22:13:48 +0000
Subject: [PATCH 006/315] Peg libswscale to the revision corresponding to the
 moment the branch was cut.

Originally committed as revision 17887 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5

From c3c2325adcef1acdafccaba566a854823a95beb0 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 31 Mar 2009 21:06:20 +0000
Subject: [PATCH 007/315] Revert hackish release version number hardcoding in
 version.sh.

Originally committed as revision 18287 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 version.sh | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/version.sh b/version.sh
index fce5d0da7d..adaa46aa91 100755
--- a/version.sh
+++ b/version.sh
@@ -1,6 +1,20 @@
 #!/bin/sh
 
-revision=0.5
+# check for SVN revision number
+revision=$(cat snapshot_version 2> /dev/null)
+test $revision || revision=$(cd "$1" && LC_ALL=C svn info 2> /dev/null | grep Revision | cut -d' ' -f2)
+test $revision || revision=$(cd "$1" && grep revision .svn/entries 2>/dev/null | cut -d '"' -f2)
+test $revision || revision=$(cd "$1" && sed -n -e '/^dir$/{n;p;q}' .svn/entries 2>/dev/null)
+test $revision && revision=SVN-r$revision
+
+# check for git short hash
+if ! test $revision; then
+    revision=$(cd "$1" && git log -1 --pretty=format:%h)
+    test $revision && revision=git-$revision
+fi
+
+# no version number found
+test $revision || revision=UNKNOWN
 
 test -n "$3" && revision=$revision-$3
 

From 8e8813a0a1fb035e8f25ac9b8ae1d7ba5d1d2be4 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 31 Mar 2009 21:11:04 +0000
Subject: [PATCH 008/315] Merge improved version number generation from trunk.

Originally committed as revision 18288 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 VERSION    |  1 +
 version.sh | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 VERSION

diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000000..2eb3c4fe4e
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.5
diff --git a/version.sh b/version.sh
index adaa46aa91..9809d6d14e 100755
--- a/version.sh
+++ b/version.sh
@@ -13,12 +13,16 @@ if ! test $revision; then
     test $revision && revision=git-$revision
 fi
 
-# no version number found
+# no revision number found
 test $revision || revision=UNKNOWN
 
-test -n "$3" && revision=$revision-$3
+# releases extract the version number from the VERSION file
+version=$(cat VERSION 2> /dev/null)
+test $version || version=$revision
 
-NEW_REVISION="#define FFMPEG_VERSION \"$revision\""
+test -n "$3" && version=$version-$3
+
+NEW_REVISION="#define FFMPEG_VERSION \"$version\""
 OLD_REVISION=$(cat version.h 2> /dev/null)
 
 # Update version.h only on revision changes to avoid spurious rebuilds

From beb93f987cef2f3a629caac29d143b2a1b901c6f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 13 Apr 2009 10:00:56 +0000
Subject: [PATCH 009/315] Add a copy of libswscale into the branch instead of
 using svn:external. This will allow merging some changes from trunk.

Originally committed as revision 18488 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libswscale/Makefile                   |   24 +
 libswscale/cs_test.c                  |  175 ++
 libswscale/internal_bfin.S            |  606 +++++
 libswscale/rgb2rgb.c                  |  442 ++++
 libswscale/rgb2rgb.h                  |  147 ++
 libswscale/rgb2rgb_template.c         | 2738 +++++++++++++++++++++
 libswscale/swscale-example.c          |  210 ++
 libswscale/swscale.c                  | 3198 +++++++++++++++++++++++++
 libswscale/swscale.h                  |  247 ++
 libswscale/swscale_altivec_template.c |  538 +++++
 libswscale/swscale_avoption.c         |   60 +
 libswscale/swscale_bfin.c             |   91 +
 libswscale/swscale_internal.h         |  324 +++
 libswscale/swscale_template.c         | 3041 +++++++++++++++++++++++
 libswscale/yuv2rgb.c                  |  684 ++++++
 libswscale/yuv2rgb_altivec.c          |  962 ++++++++
 libswscale/yuv2rgb_bfin.c             |  203 ++
 libswscale/yuv2rgb_mlib.c             |   85 +
 libswscale/yuv2rgb_template.c         |  453 ++++
 libswscale/yuv2rgb_vis.c              |  209 ++
 20 files changed, 14437 insertions(+)
 create mode 100644 libswscale/Makefile
 create mode 100644 libswscale/cs_test.c
 create mode 100644 libswscale/internal_bfin.S
 create mode 100644 libswscale/rgb2rgb.c
 create mode 100644 libswscale/rgb2rgb.h
 create mode 100644 libswscale/rgb2rgb_template.c
 create mode 100644 libswscale/swscale-example.c
 create mode 100644 libswscale/swscale.c
 create mode 100644 libswscale/swscale.h
 create mode 100644 libswscale/swscale_altivec_template.c
 create mode 100644 libswscale/swscale_avoption.c
 create mode 100644 libswscale/swscale_bfin.c
 create mode 100644 libswscale/swscale_internal.h
 create mode 100644 libswscale/swscale_template.c
 create mode 100644 libswscale/yuv2rgb.c
 create mode 100644 libswscale/yuv2rgb_altivec.c
 create mode 100644 libswscale/yuv2rgb_bfin.c
 create mode 100644 libswscale/yuv2rgb_mlib.c
 create mode 100644 libswscale/yuv2rgb_template.c
 create mode 100644 libswscale/yuv2rgb_vis.c

diff --git a/libswscale/Makefile b/libswscale/Makefile
new file mode 100644
index 0000000000..6d500abc65
--- /dev/null
+++ b/libswscale/Makefile
@@ -0,0 +1,24 @@
+include $(SUBDIR)../config.mak
+
+NAME = swscale
+FFLIBS = avutil
+
+HEADERS = swscale.h
+
+OBJS = rgb2rgb.o swscale.o swscale_avoption.o yuv2rgb.o
+
+OBJS-$(ARCH_BFIN)          +=  internal_bfin.o swscale_bfin.o yuv2rgb_bfin.o
+OBJS-$(CONFIG_MLIB)        +=  yuv2rgb_mlib.o
+OBJS-$(HAVE_ALTIVEC)       +=  yuv2rgb_altivec.o
+OBJS-$(HAVE_VIS)           +=  yuv2rgb_vis.o
+
+TESTS = cs_test swscale-example
+
+CLEANFILES = cs_test swscale-example
+
+include $(SUBDIR)../subdir.mak
+
+$(SUBDIR)cs_test: $(SUBDIR)cs_test.o $(SUBDIR)$(LIBNAME)
+
+$(SUBDIR)swscale-example: $(SUBDIR)swscale-example.o $(SUBDIR)$(LIBNAME)
+$(SUBDIR)swscale-example: EXTRALIBS += -lm
diff --git a/libswscale/cs_test.c b/libswscale/cs_test.c
new file mode 100644
index 0000000000..2223ee3a31
--- /dev/null
+++ b/libswscale/cs_test.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <string.h>              /* for memset() */
+#include <unistd.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "swscale.h"
+#include "rgb2rgb.h"
+
+#define SIZE 1000
+#define srcByte 0x55
+#define dstByte 0xBB
+
+#define FUNC(s,d,n) {s,d,#n,n}
+
+static int cpu_caps;
+
+static char *args_parse(int argc, char *argv[])
+{
+    int o;
+
+    while ((o = getopt(argc, argv, "m23")) != -1) {
+        switch (o) {
+            case 'm':
+                cpu_caps |= SWS_CPU_CAPS_MMX;
+                break;
+            case '2':
+                cpu_caps |= SWS_CPU_CAPS_MMX2;
+                break;
+            case '3':
+                cpu_caps |= SWS_CPU_CAPS_3DNOW;
+                break;
+            default:
+                av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
+        }
+    }
+
+    return argv[optind];
+}
+
+int main(int argc, char **argv)
+{
+    int i, funcNum;
+    uint8_t *srcBuffer= (uint8_t*)av_malloc(SIZE);
+    uint8_t *dstBuffer= (uint8_t*)av_malloc(SIZE);
+    int failedNum=0;
+    int passedNum=0;
+
+    av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
+    args_parse(argc, argv);
+    av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
+    sws_rgb2rgb_init(cpu_caps);
+
+    for(funcNum=0; ; funcNum++){
+        struct func_info_s {
+            int src_bpp;
+            int dst_bpp;
+            const char *name;
+            void (*func)(const uint8_t *src, uint8_t *dst, long src_size);
+        } func_info[] = {
+            FUNC(2, 2, rgb15to16),
+            FUNC(2, 3, rgb15to24),
+            FUNC(2, 4, rgb15to32),
+            FUNC(2, 3, rgb16to24),
+            FUNC(2, 4, rgb16to32),
+            FUNC(3, 2, rgb24to15),
+            FUNC(3, 2, rgb24to16),
+            FUNC(3, 4, rgb24to32),
+            FUNC(4, 2, rgb32to15),
+            FUNC(4, 2, rgb32to16),
+            FUNC(4, 3, rgb32to24),
+            FUNC(2, 2, rgb16to15),
+            FUNC(2, 2, rgb15tobgr15),
+            FUNC(2, 2, rgb15tobgr16),
+            FUNC(2, 3, rgb15tobgr24),
+            FUNC(2, 4, rgb15tobgr32),
+            FUNC(2, 2, rgb16tobgr15),
+            FUNC(2, 2, rgb16tobgr16),
+            FUNC(2, 3, rgb16tobgr24),
+            FUNC(2, 4, rgb16tobgr32),
+            FUNC(3, 2, rgb24tobgr15),
+            FUNC(3, 2, rgb24tobgr16),
+            FUNC(3, 3, rgb24tobgr24),
+            FUNC(3, 4, rgb24tobgr32),
+            FUNC(4, 2, rgb32tobgr15),
+            FUNC(4, 2, rgb32tobgr16),
+            FUNC(4, 3, rgb32tobgr24),
+            FUNC(4, 4, rgb32tobgr32),
+            FUNC(0, 0, NULL)
+        };
+        int width;
+        int failed=0;
+        int srcBpp=0;
+        int dstBpp=0;
+
+        if (!func_info[funcNum].func) break;
+
+        av_log(NULL, AV_LOG_INFO,".");
+        memset(srcBuffer, srcByte, SIZE);
+
+        for(width=63; width>0; width--){
+            int dstOffset;
+            for(dstOffset=128; dstOffset<196; dstOffset+=4){
+                int srcOffset;
+                memset(dstBuffer, dstByte, SIZE);
+
+                for(srcOffset=128; srcOffset<196; srcOffset+=4){
+                    uint8_t *src= srcBuffer+srcOffset;
+                    uint8_t *dst= dstBuffer+dstOffset;
+                    const char *name=NULL;
+
+                    if(failed) break; //don't fill the screen with shit ...
+
+                    srcBpp = func_info[funcNum].src_bpp;
+                    dstBpp = func_info[funcNum].dst_bpp;
+                    name   = func_info[funcNum].name;
+
+                    func_info[funcNum].func(src, dst, width*srcBpp);
+
+                    if(!srcBpp) break;
+
+                    for(i=0; i<SIZE; i++){
+                        if(srcBuffer[i]!=srcByte){
+                            av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n",
+                                   i, width, srcOffset, dstOffset, name);
+                            failed=1;
+                            break;
+                        }
+                    }
+                    for(i=0; i<dstOffset; i++){
+                        if(dstBuffer[i]!=dstByte){
+                            av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
+                                   i, width, srcOffset, dstOffset, name);
+                            failed=1;
+                            break;
+                        }
+                    }
+                    for(i=dstOffset + width*dstBpp; i<SIZE; i++){
+                        if(dstBuffer[i]!=dstByte){
+                            av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
+                                   i, width, srcOffset, dstOffset, name);
+                            failed=1;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        if(failed) failedNum++;
+        else if(srcBpp) passedNum++;
+    }
+
+    av_log(NULL, AV_LOG_INFO, "\n%d converters passed, %d converters randomly overwrote memory\n", passedNum, failedNum);
+    return failedNum;
+}
diff --git a/libswscale/internal_bfin.S b/libswscale/internal_bfin.S
new file mode 100644
index 0000000000..fb7bda7e12
--- /dev/null
+++ b/libswscale/internal_bfin.S
@@ -0,0 +1,606 @@
+/*
+ * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
+ *                    April 20, 2007
+ *
+ * Blackfin video color space converter operations
+ * convert I420 YV12 to RGB in various formats
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+/*
+YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock
+and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts.
+
+
+The following calculation is used for the conversion:
+
+  r = clipz((y-oy)*cy  + crv*(v-128))
+  g = clipz((y-oy)*cy  + cgv*(v-128) + cgu*(u-128))
+  b = clipz((y-oy)*cy  + cbu*(u-128))
+
+y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision.
+
+
+New factorization to eliminate the truncation error which was
+occurring due to the byteop3p.
+
+
+1) Use the bytop16m to subtract quad bytes we use this in U8 this
+ then so the offsets need to be renormalized to 8bits.
+
+2) Scale operands up by a factor of 4 not 8 because Blackfin
+   multiplies include a shift.
+
+3) Compute into the accumulators cy*yx0, cy*yx1.
+
+4) Compute each of the linear equations:
+     r = clipz((y - oy) * cy  + crv * (v - 128))
+
+     g = clipz((y - oy) * cy  + cgv * (v - 128) + cgu * (u - 128))
+
+     b = clipz((y - oy) * cy  + cbu * (u - 128))
+
+   Reuse of the accumulators requires that we actually multiply
+   twice once with addition and the second time with a subtraction.
+
+   Because of this we need to compute the equations in the order R B
+   then G saving the writes for B in the case of 24/32 bit color
+   formats.
+
+   API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out,
+                      int dW, uint32_t *coeffs);
+
+       A          B
+       ---        ---
+       i2 = cb    i3 = cr
+       i1 = coeff i0 = y
+
+Where coeffs have the following layout in memory.
+
+uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv;
+
+coeffs is a pointer to oy.
+
+The {rgb} masks are only utilized by the 565 packing algorithm. Note the data
+replication is used to simplify the internal algorithms for the dual Mac
+architecture of BlackFin.
+
+All routines are exported with _ff_bfin_ as a symbol prefix.
+
+Rough performance gain compared against -O3:
+
+2779809/1484290 187.28%
+
+which translates to ~33c/pel to ~57c/pel for the reference vs 17.5
+c/pel for the optimized implementations. Not sure why there is such a
+huge variation on the reference codes on Blackfin I guess it must have
+to do with the memory system.
+*/
+
+#define mL3 .text
+#ifdef __FDPIC__
+#define mL1 .l1.text
+#else
+#define mL1 mL3
+#endif
+#define MEM mL1
+
+#define DEFUN(fname,where,interface) \
+        .section where;              \
+        .global _ff_bfin_ ## fname;  \
+        .type _ff_bfin_ ## fname, STT_FUNC; \
+        .align 8;                    \
+        _ff_bfin_ ## fname
+
+#define DEFUN_END(fname) \
+        .size _ff_bfin_ ## fname, . - _ff_bfin_ ## fname
+
+
+.text
+
+#define COEFF_LEN        11*4
+#define COEFF_REL_CY_OFF 4*4
+
+#define ARG_OUT   20
+#define ARG_W     24
+#define ARG_COEFF 28
+
+DEFUN(yuv2rgb565_line,MEM,
+   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
+        link 0;
+        [--sp] = (r7:4);
+        p1 = [fp+ARG_OUT];
+        r3 = [fp+ARG_W];
+
+        i0 = r0;
+        i2 = r1;
+        i3 = r2;
+
+        r0 = [fp+ARG_COEFF];
+        i1 = r0;
+        b1 = i1;
+        l1 = COEFF_LEN;
+        m0 = COEFF_REL_CY_OFF;
+        p0 = r3;
+
+        r0   = [i0++];         // 2Y
+        r1.l = w[i2++];        // 2u
+        r1.h = w[i3++];        // 2v
+        p0 = p0>>2;
+
+        lsetup (.L0565, .L1565) lc0 = p0;
+
+        /*
+           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
+           r0 -- used to load 4ys
+           r1 -- used to load 2us,2vs
+           r4 -- y3,y2
+           r5 -- y1,y0
+           r6 -- u1,u0
+           r7 -- v1,v0
+        */
+                                                              r2=[i1++]; // oy
+.L0565:
+        /*
+        rrrrrrrr gggggggg bbbbbbbb
+         5432109876543210
+                    bbbbb >>3
+              gggggggg    <<3
+         rrrrrrrr         <<8
+         rrrrrggggggbbbbb
+        */
+        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
+        (r7,r6) = byteop16m (r1:0, r3:2) (r);
+        r5 = r5 << 2 (v);                                                // y1,y0
+        r4 = r4 << 2 (v);                                                // y3,y2
+        r6 = r6 << 2 (v)                                   || r0=[i1++]; // u1,u0, r0=zero
+        r7 = r7 << 2 (v)                                   || r1=[i1++]; // v1,v0  r1=cy
+        /* Y' = y*cy */
+        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2 = r2 >> 3 (v);
+        r3 = r2 & r5;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
+                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+        r2 = r2 << 8 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask
+        r2 = r2 << 3 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+        [p1++]=r3                                          || r1=[i1++]; // cy
+
+        /* Y' = y*cy */
+
+        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
+                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2 = r2 >> 3 (v);
+        r3 = r2 & r5;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
+                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+        r2 = r2 << 8 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0   =  [i0++];        // 2Y
+        r2 = r2 << 3 (v)                                   || r1.l = w[i2++];        // 2u
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+        [p1++]=r3                                          || r1.h = w[i3++];        // 2v
+.L1565:                                                       r2=[i1++]; // oy
+
+        l1 = 0;
+
+        (r7:4) = [sp++];
+        unlink;
+        rts;
+DEFUN_END(yuv2rgb565_line)
+
+DEFUN(yuv2rgb555_line,MEM,
+   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
+        link 0;
+        [--sp] = (r7:4);
+        p1 = [fp+ARG_OUT];
+        r3 = [fp+ARG_W];
+
+        i0 = r0;
+        i2 = r1;
+        i3 = r2;
+
+        r0 = [fp+ARG_COEFF];
+        i1 = r0;
+        b1 = i1;
+        l1 = COEFF_LEN;
+        m0 = COEFF_REL_CY_OFF;
+        p0 = r3;
+
+        r0   = [i0++];         // 2Y
+        r1.l = w[i2++];        // 2u
+        r1.h = w[i3++];        // 2v
+        p0 = p0>>2;
+
+        lsetup (.L0555, .L1555) lc0 = p0;
+
+        /*
+           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
+           r0 -- used to load 4ys
+           r1 -- used to load 2us,2vs
+           r4 -- y3,y2
+           r5 -- y1,y0
+           r6 -- u1,u0
+           r7 -- v1,v0
+        */
+                                                              r2=[i1++]; // oy
+.L0555:
+        /*
+        rrrrrrrr gggggggg bbbbbbbb
+         5432109876543210
+                    bbbbb >>3
+               gggggggg   <<2
+          rrrrrrrr        <<7
+         xrrrrrgggggbbbbb
+        */
+
+        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
+        (r7,r6) = byteop16m (r1:0, r3:2) (r);
+        r5 = r5 << 2 (v);                                                // y1,y0
+        r4 = r4 << 2 (v);                                                // y3,y2
+        r6 = r6 << 2 (v)                                   || r0=[i1++]; // u1,u0, r0=zero
+        r7 = r7 << 2 (v)                                   || r1=[i1++]; // v1,v0  r1=cy
+        /* Y' = y*cy */
+        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2 = r2 >> 3 (v);
+        r3 = r2 & r5;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
+                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+        r2 = r2 << 7 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask
+        r2 = r2 << 2 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+        [p1++]=r3                                          || r1=[i1++]; // cy
+
+        /* Y' = y*cy */
+
+        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
+                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2 = r2 >> 3 (v);
+        r3 = r2 & r5;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
+                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+        r2 = r2 << 7 (v);
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0=[i0++];     // 4Y
+        r2 = r2 << 2 (v)                                   || r1.l=w[i2++];  // 2u
+        r2 = r2 & r5;
+        r3 = r3 | r2;
+        [p1++]=r3                                          || r1.h=w[i3++]; // 2v
+
+.L1555:                                                       r2=[i1++]; // oy
+
+        l1 = 0;
+
+        (r7:4) = [sp++];
+        unlink;
+        rts;
+DEFUN_END(yuv2rgb555_line)
+
+DEFUN(yuv2rgb24_line,MEM,
+   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):
+        link 0;
+        [--sp] = (r7:4);
+        p1 = [fp+ARG_OUT];
+        r3 = [fp+ARG_W];
+        p2 = p1;
+        p2 += 3;
+
+        i0 = r0;
+        i2 = r1;
+        i3 = r2;
+
+        r0 = [fp+ARG_COEFF]; // coeff buffer
+        i1 = r0;
+        b1 = i1;
+        l1 = COEFF_LEN;
+        m0 = COEFF_REL_CY_OFF;
+        p0 = r3;
+
+        r0   = [i0++];         // 2Y
+        r1.l = w[i2++];        // 2u
+        r1.h = w[i3++];        // 2v
+        p0 = p0>>2;
+
+        lsetup (.L0888, .L1888) lc0 = p0;
+
+        /*
+           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv
+           r0 -- used to load 4ys
+           r1 -- used to load 2us,2vs
+           r4 -- y3,y2
+           r5 -- y1,y0
+           r6 -- u1,u0
+           r7 -- v1,v0
+        */
+                                                              r2=[i1++]; // oy
+.L0888:
+        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc
+        (r7,r6) = byteop16m (r1:0, r3:2) (r);
+        r5 = r5 << 2 (v);               // y1,y0
+        r4 = r4 << 2 (v);               // y3,y2
+        r6 = r6 << 2 (v) || r0=[i1++];  // u1,u0, r0=zero
+        r7 = r7 << 2 (v) || r1=[i1++];  // v1,v0  r1=cy
+
+        /* Y' = y*cy */
+        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2=r2>>16 || B[p1++]=r2;
+                     B[p2++]=r2;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);
+                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask
+        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask, oy,cy,zero
+
+        r2=r2>>16 || B[p1++]=r2;
+                     B[p2++]=r2;
+
+        r3=r3>>16 || B[p1++]=r3;
+                     B[p2++]=r3                            || r1=[i1++]; // cy
+
+        p1+=3;
+        p2+=3;
+        /* Y' = y*cy */
+        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv
+
+        /* R = Y+ crv*(Cr-128) */
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
+                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu
+        r2=r2>>16 || B[p1++]=r2;
+        B[p2++]=r2;
+
+        /* B = Y+ cbu*(Cb-128) */
+        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);
+                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask
+        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu
+
+        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */
+                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv
+        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);
+        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++]; // gmask
+        r2=r2>>16 || B[p1++]=r2 || r0 = [i0++];    // 4y
+                     B[p2++]=r2 || r1.l = w[i2++]; // 2u
+        r3=r3>>16 || B[p1++]=r3 || r1.h = w[i3++]; // 2v
+                     B[p2++]=r3 || r2=[i1++];      // oy
+
+        p1+=3;
+.L1888: p2+=3;
+
+        l1 = 0;
+
+        (r7:4) = [sp++];
+        unlink;
+        rts;
+DEFUN_END(yuv2rgb24_line)
+
+
+
+#define ARG_vdst        20
+#define ARG_width       24
+#define ARG_height      28
+#define ARG_lumStride   32
+#define ARG_chromStride 36
+#define ARG_srcStride   40
+
+DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                         long width, long height,
+                         long lumStride, long chromStride, long srcStride)):
+        link 0;
+        [--sp] = (r7:4,p5:4);
+
+        p0 = r1;       // Y top even
+
+        i2 = r2; // *u
+        r2 = [fp + ARG_vdst];
+        i3 = r2; // *v
+
+        r1 = [fp + ARG_srcStride];
+        r2 = r0 + r1;
+        r1 += -8;  // i0,i1 is pre read need to correct
+        m0 = r1;
+
+        i0 = r0;  // uyvy_T even
+        i1 = r2;  // uyvy_B odd
+
+        p2 = [fp + ARG_lumStride];
+        p1 = p0 + p2;  // Y bot odd
+
+        p5 = [fp + ARG_width];
+        p4 = [fp + ARG_height];
+        r0 = p5;
+        p4 = p4 >> 1;
+        p5 = p5 >> 2;
+
+        r2 = [fp + ARG_chromStride];
+        r0 = r0 >> 1;
+        r2 = r2 - r0;
+        m1 = r2;
+
+        /*   I0,I1 - src input line pointers
+         *   p0,p1 - luma output line pointers
+         *   I2    - dstU
+         *   I3    - dstV
+         */
+
+        lsetup (0f, 1f) lc1 = p4;   // H/2
+0:        r0 = [i0++] || r2 = [i1++];
+          r1 = [i0++] || r3 = [i1++];
+          r4 = byteop1p(r1:0, r3:2);
+          r5 = byteop1p(r1:0, r3:2) (r);
+          lsetup (2f, 3f) lc0 = p5; // W/4
+2:          r0 = r0 >> 8(v);
+            r1 = r1 >> 8(v);
+            r2 = r2 >> 8(v);
+            r3 = r3 >> 8(v);
+            r0 = bytepack(r0, r1);
+            r2 = bytepack(r2, r3)         ||  [p0++] = r0;    // yyyy
+            r6 = pack(r5.l, r4.l)         ||  [p1++] = r2;    // yyyy
+            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];
+            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];
+            r4 = byteop1p(r1:0, r3:2)     ||  w[i2++] = r6.l; // uu
+3:          r5 = byteop1p(r1:0, r3:2) (r) ||  w[i3++] = r6.h; // vv
+
+          i0 += m0;
+          i1 += m0;
+          i2 += m1;
+          i3 += m1;
+          p0 = p0 + p2;
+1:        p1 = p1 + p2;
+
+        (r7:4,p5:4) = [sp++];
+        unlink;
+        rts;
+DEFUN_END(uyvytoyv12)
+
+DEFUN(yuyvtoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                         long width, long height,
+                         long lumStride, long chromStride, long srcStride)):
+        link 0;
+        [--sp] = (r7:4,p5:4);
+
+        p0 = r1;       // Y top even
+
+        i2 = r2; // *u
+        r2 = [fp + ARG_vdst];
+        i3 = r2; // *v
+
+        r1 = [fp + ARG_srcStride];
+        r2 = r0 + r1;
+        r1 += -8;  // i0,i1 is pre read need to correct
+        m0 = r1;
+
+        i0 = r0;  // uyvy_T even
+        i1 = r2;  // uyvy_B odd
+
+        p2 = [fp + ARG_lumStride];
+        p1 = p0 + p2;  // Y bot odd
+
+        p5 = [fp + ARG_width];
+        p4 = [fp + ARG_height];
+        r0 = p5;
+        p4 = p4 >> 1;
+        p5 = p5 >> 2;
+
+        r2 = [fp + ARG_chromStride];
+        r0 = r0 >> 1;
+        r2 = r2 - r0;
+        m1 = r2;
+
+        /*   I0,I1 - src input line pointers
+         *   p0,p1 - luma output line pointers
+         *   I2    - dstU
+         *   I3    - dstV
+         */
+
+        lsetup (0f, 1f) lc1 = p4;   // H/2
+0:        r0 = [i0++] || r2 = [i1++];
+          r1 = [i0++] || r3 = [i1++];
+          r4 = bytepack(r0, r1);
+          r5 = bytepack(r2, r3);
+          lsetup (2f, 3f) lc0 = p5; // W/4
+2:          r0 = r0 >> 8(v) || [p0++] = r4;  // yyyy-even
+            r1 = r1 >> 8(v) || [p1++] = r5;  // yyyy-odd
+            r2 = r2 >> 8(v);
+            r3 = r3 >> 8(v);
+            r4 = byteop1p(r1:0, r3:2);
+            r5 = byteop1p(r1:0, r3:2) (r);
+            r6 = pack(r5.l, r4.l);
+            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];
+            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];
+            r4 = bytepack(r0, r1)         ||  w[i2++] = r6.l; // uu
+3:          r5 = bytepack(r2, r3)         ||  w[i3++] = r6.h; // vv
+
+          i0 += m0;
+          i1 += m0;
+          i2 += m1;
+          i3 += m1;
+          p0 = p0 + p2;
+1:        p1 = p1 + p2;
+
+        (r7:4,p5:4) = [sp++];
+        unlink;
+        rts;
+DEFUN_END(yuyvtoyv12)
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
new file mode 100644
index 0000000000..ad69265c37
--- /dev/null
+++ b/libswscale/rgb2rgb.c
@@ -0,0 +1,442 @@
+/*
+ * software RGB to RGB converter
+ * pluralize by software PAL8 to RGB converter
+ *              software YUV to YUV converter
+ *              software YUV to RGB converter
+ * Written by Nick Kurshev.
+ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * The C code (not assembly, MMX, ...) of this file can be used
+ * under the LGPL license.
+ */
+#include <inttypes.h>
+#include "config.h"
+#include "libavutil/x86_cpu.h"
+#include "libavutil/bswap.h"
+#include "rgb2rgb.h"
+#include "swscale.h"
+#include "swscale_internal.h"
+
+#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
+
+void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+
+void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                   long width, long height,
+                   long lumStride, long chromStride, long dstStride);
+void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                   long width, long height,
+                   long lumStride, long chromStride, long dstStride);
+void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                      long width, long height,
+                      long lumStride, long chromStride, long dstStride);
+void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                      long width, long height,
+                      long lumStride, long chromStride, long dstStride);
+void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                   long width, long height,
+                   long lumStride, long chromStride, long srcStride);
+void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                    long width, long height,
+                    long lumStride, long chromStride, long srcStride);
+void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
+                 long srcStride, long dstStride);
+void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst,
+                        long width, long height, long src1Stride,
+                        long src2Stride, long dstStride);
+void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
+                    uint8_t *dst1, uint8_t *dst2,
+                    long width, long height,
+                    long srcStride1, long srcStride2,
+                    long dstStride1, long dstStride2);
+void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+                     uint8_t *dst,
+                     long width, long height,
+                     long srcStride1, long srcStride2,
+                     long srcStride3, long dstStride);
+
+#if ARCH_X86 && CONFIG_GPL
+DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mmx_one)      = 0xFFFFFFFFFFFFFFFFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32b)      = 0x000000FF000000FFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32g)      = 0x0000FF000000FF00ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32r)      = 0x00FF000000FF0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32a)      = 0xFF000000FF000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask32)       = 0x00FFFFFF00FFFFFFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask3216br)   = 0x00F800F800F800F8ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask3216g)    = 0x0000FC000000FC00ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask3215g)    = 0x0000F8000000F800ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul3216)      = 0x2000000420000004ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul3215)      = 0x2000000820000008ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24b)      = 0x00FF0000FF0000FFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24g)      = 0xFF0000FF0000FF00ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24r)      = 0x0000FF0000FF0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24l)      = 0x0000000000FFFFFFULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24h)      = 0x0000FFFFFF000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24hh)     = 0xffff000000000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24hhh)    = 0xffffffff00000000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask24hhhh)   = 0xffffffffffff0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask15b)      = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
+DECLARE_ASM_CONST(8, uint64_t, mask15rg)     = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
+DECLARE_ASM_CONST(8, uint64_t, mask15s)      = 0xFFE0FFE0FFE0FFE0ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask15g)      = 0x03E003E003E003E0ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask15r)      = 0x7C007C007C007C00ULL;
+#define mask16b mask15b
+DECLARE_ASM_CONST(8, uint64_t, mask16g)      = 0x07E007E007E007E0ULL;
+DECLARE_ASM_CONST(8, uint64_t, mask16r)      = 0xF800F800F800F800ULL;
+DECLARE_ASM_CONST(8, uint64_t, red_16mask)   = 0x0000f8000000f800ULL;
+DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL;
+DECLARE_ASM_CONST(8, uint64_t, blue_16mask)  = 0x0000001f0000001fULL;
+DECLARE_ASM_CONST(8, uint64_t, red_15mask)   = 0x00007c0000007c00ULL;
+DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
+DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
+#endif /* ARCH_X86 */
+
+#define RGB2YUV_SHIFT 8
+#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
+#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
+#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
+#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
+#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
+#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
+#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
+#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
+#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
+
+//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
+//plain C versions
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE2
+#define HAVE_MMX 0
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _C
+#include "rgb2rgb_template.c"
+
+#if ARCH_X86 && CONFIG_GPL
+
+//MMX versions
+#undef RENAME
+#undef HAVE_MMX
+#define HAVE_MMX 1
+#define RENAME(a) a ## _MMX
+#include "rgb2rgb_template.c"
+
+//MMX2 versions
+#undef RENAME
+#undef HAVE_MMX2
+#define HAVE_MMX2 1
+#define RENAME(a) a ## _MMX2
+#include "rgb2rgb_template.c"
+
+//3DNOW versions
+#undef RENAME
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 1
+#define RENAME(a) a ## _3DNOW
+#include "rgb2rgb_template.c"
+
+#endif //ARCH_X86 || ARCH_X86_64
+
+/*
+ RGB15->RGB16 original by Strepto/Astral
+ ported to gcc & bugfixed : A'rpi
+ MMX2, 3DNOW optimization by Nick Kurshev
+ 32-bit C version, and and&add trick by Michael Niedermayer
+*/
+
+void sws_rgb2rgb_init(int flags){
+#if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX)  && CONFIG_GPL
+    if (flags & SWS_CPU_CAPS_MMX2)
+        rgb2rgb_init_MMX2();
+    else if (flags & SWS_CPU_CAPS_3DNOW)
+        rgb2rgb_init_3DNOW();
+    else if (flags & SWS_CPU_CAPS_MMX)
+        rgb2rgb_init_MMX();
+    else
+#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
+        rgb2rgb_init_C();
+}
+
+/**
+ * Convert the palette to the same packet 32-bit format as the palette
+ */
+void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+
+    for (i=0; i<num_pixels; i++)
+        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
+}
+
+/**
+ * Palette format: ABCD -> dst format: ABC
+ */
+void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+
+    for (i=0; i<num_pixels; i++)
+    {
+        //FIXME slow?
+        dst[0]= palette[src[i]*4+0];
+        dst[1]= palette[src[i]*4+1];
+        dst[2]= palette[src[i]*4+2];
+        dst+= 3;
+    }
+}
+
+/**
+ * Palette is assumed to contain BGR16, see rgb32to16 to convert the palette.
+ */
+void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+    for (i=0; i<num_pixels; i++)
+        ((uint16_t *)dst)[i] = ((const uint16_t *)palette)[src[i]];
+}
+void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+    for (i=0; i<num_pixels; i++)
+        ((uint16_t *)dst)[i] = bswap_16(((const uint16_t *)palette)[src[i]]);
+}
+
+/**
+ * Palette is assumed to contain BGR15, see rgb32to15 to convert the palette.
+ */
+void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+    for (i=0; i<num_pixels; i++)
+        ((uint16_t *)dst)[i] = ((const uint16_t *)palette)[src[i]];
+}
+void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+{
+    long i;
+    for (i=0; i<num_pixels; i++)
+        ((uint16_t *)dst)[i] = bswap_16(((const uint16_t *)palette)[src[i]]);
+}
+
+void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 2;
+    for (i=0; i<num_pixels; i++)
+    {
+        #ifdef WORDS_BIGENDIAN
+            /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
+            dst[3*i + 0] = src[4*i + 1];
+            dst[3*i + 1] = src[4*i + 2];
+            dst[3*i + 2] = src[4*i + 3];
+        #else
+            dst[3*i + 0] = src[4*i + 2];
+            dst[3*i + 1] = src[4*i + 1];
+            dst[3*i + 2] = src[4*i + 0];
+        #endif
+    }
+}
+
+void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    for (i=0; 3*i<src_size; i++)
+    {
+        #ifdef WORDS_BIGENDIAN
+            /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
+            dst[4*i + 0] = 255;
+            dst[4*i + 1] = src[3*i + 0];
+            dst[4*i + 2] = src[3*i + 1];
+            dst[4*i + 3] = src[3*i + 2];
+        #else
+            dst[4*i + 0] = src[3*i + 2];
+            dst[4*i + 1] = src[3*i + 1];
+            dst[4*i + 2] = src[3*i + 0];
+            dst[4*i + 3] = 255;
+        #endif
+    }
+}
+
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+    while (s < end)
+    {
+        register uint16_t bgr;
+        bgr = *s++;
+        #ifdef WORDS_BIGENDIAN
+            *d++ = 255;
+            *d++ = (bgr&0x1F)<<3;
+            *d++ = (bgr&0x7E0)>>3;
+            *d++ = (bgr&0xF800)>>8;
+        #else
+            *d++ = (bgr&0xF800)>>8;
+            *d++ = (bgr&0x7E0)>>3;
+            *d++ = (bgr&0x1F)<<3;
+            *d++ = 255;
+        #endif
+    }
+}
+
+void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+    while (s < end)
+    {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0xF800)>>8;
+        *d++ = (bgr&0x7E0)>>3;
+        *d++ = (bgr&0x1F)<<3;
+    }
+}
+
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++)
+    {
+        unsigned rgb = ((const uint16_t*)src)[i];
+        ((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11);
+    }
+}
+
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++)
+    {
+        unsigned rgb = ((const uint16_t*)src)[i];
+        ((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10);
+    }
+}
+
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+    while (s < end)
+    {
+        register uint16_t bgr;
+        bgr = *s++;
+        #ifdef WORDS_BIGENDIAN
+            *d++ = 255;
+            *d++ = (bgr&0x1F)<<3;
+            *d++ = (bgr&0x3E0)>>2;
+            *d++ = (bgr&0x7C00)>>7;
+        #else
+            *d++ = (bgr&0x7C00)>>7;
+            *d++ = (bgr&0x3E0)>>2;
+            *d++ = (bgr&0x1F)<<3;
+            *d++ = 255;
+        #endif
+    }
+}
+
+void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+    while (s < end)
+    {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x7C00)>>7;
+        *d++ = (bgr&0x3E0)>>2;
+        *d++ = (bgr&0x1F)<<3;
+    }
+}
+
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++)
+    {
+        unsigned rgb = ((const uint16_t*)src)[i];
+        ((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11);
+    }
+}
+
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size >> 1;
+
+    for (i=0; i<num_pixels; i++)
+    {
+        unsigned br;
+        unsigned rgb = ((const uint16_t*)src)[i];
+        br = rgb&0x7c1F;
+        ((uint16_t*)dst)[i] = (br>>10) | (rgb&0x3E0) | (br<<10);
+    }
+}
+
+void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long i;
+    long num_pixels = src_size;
+    for (i=0; i<num_pixels; i++)
+    {
+        unsigned b,g,r;
+        register uint8_t rgb;
+        rgb = src[i];
+        r = (rgb&0x07);
+        g = (rgb&0x38)>>3;
+        b = (rgb&0xC0)>>6;
+        dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
+    }
+}
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
new file mode 100644
index 0000000000..df912c8533
--- /dev/null
+++ b/libswscale/rgb2rgb.h
@@ -0,0 +1,147 @@
+/*
+ *  software RGB to RGB converter
+ *  pluralize by Software PAL8 to RGB converter
+ *               Software YUV to YUV converter
+ *               Software YUV to RGB converter
+ *  Written by Nick Kurshev.
+ *  palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWSCALE_RGB2RGB_H
+#define SWSCALE_RGB2RGB_H
+
+#include <inttypes.h>
+
+/* A full collection of RGB to RGB(BGR) converters */
+extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+
+void rgb24to32   (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb32to24   (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16to24   (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15to24   (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
+void bgr8torgb8  (const uint8_t *src, uint8_t *dst, long src_size);
+
+
+void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line, others are ignored.
+ * FIXME: Write high quality version.
+ */
+//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                          long width, long height,
+                          long lumStride, long chromStride, long dstStride);
+
+/**
+ * Width should be a multiple of 16.
+ */
+extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                             long width, long height,
+                             long lumStride, long chromStride, long dstStride);
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                          long width, long height,
+                          long lumStride, long chromStride, long srcStride);
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                          long width, long height,
+                          long lumStride, long chromStride, long dstStride);
+
+/**
+ * Width should be a multiple of 16.
+ */
+extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                             long width, long height,
+                             long lumStride, long chromStride, long dstStride);
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 2.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line, others are ignored.
+ * FIXME: Write high quality version.
+ */
+extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                           long width, long height,
+                           long lumStride, long chromStride, long srcStride);
+extern void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
+                        long srcStride, long dstStride);
+
+extern void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst,
+                               long width, long height, long src1Stride,
+                               long src2Stride, long dstStride);
+
+extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
+                           uint8_t *dst1, uint8_t *dst2,
+                           long width, long height,
+                           long srcStride1, long srcStride2,
+                           long dstStride1, long dstStride2);
+
+extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+                            uint8_t *dst,
+                            long width, long height,
+                            long srcStride1, long srcStride2,
+                            long srcStride3, long dstStride);
+
+void sws_rgb2rgb_init(int flags);
+
+#endif /* SWSCALE_RGB2RGB_H */
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
new file mode 100644
index 0000000000..e95b628049
--- /dev/null
+++ b/libswscale/rgb2rgb_template.c
@@ -0,0 +1,2738 @@
+/*
+ * software RGB to RGB converter
+ * pluralize by software PAL8 to RGB converter
+ *              software YUV to YUV converter
+ *              software YUV to RGB converter
+ * Written by Nick Kurshev.
+ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
+ * lot of big-endian byte order fixes by Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * The C code (not assembly, MMX, ...) of this file can be used
+ * under the LGPL license.
+ */
+
+#include <stddef.h>
+
+#undef PREFETCH
+#undef MOVNTQ
+#undef EMMS
+#undef SFENCE
+#undef MMREG_SIZE
+#undef PREFETCHW
+#undef PAVGB
+
+#if HAVE_SSE2
+#define MMREG_SIZE 16
+#else
+#define MMREG_SIZE 8
+#endif
+
+#if HAVE_AMD3DNOW
+#define PREFETCH  "prefetch"
+#define PREFETCHW "prefetchw"
+#define PAVGB     "pavgusb"
+#elif HAVE_MMX2
+#define PREFETCH "prefetchnta"
+#define PREFETCHW "prefetcht0"
+#define PAVGB     "pavgb"
+#else
+#define PREFETCH  " # nop"
+#define PREFETCHW " # nop"
+#endif
+
+#if HAVE_AMD3DNOW
+/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
+#define EMMS     "femms"
+#else
+#define EMMS     "emms"
+#endif
+
+#if HAVE_MMX2
+#define MOVNTQ "movntq"
+#define SFENCE "sfence"
+#else
+#define MOVNTQ "movq"
+#define SFENCE " # nop"
+#endif
+
+static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    uint8_t *dest = dst;
+    const uint8_t *s = src;
+    const uint8_t *end;
+    #if HAVE_MMX
+        const uint8_t *mm_end;
+    #endif
+    end = s + src_size;
+    #if HAVE_MMX
+        __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+        mm_end = end - 23;
+        __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
+        while (s < mm_end)
+        {
+            __asm__ volatile(
+            PREFETCH"    32%1           \n\t"
+            "movd          %1, %%mm0    \n\t"
+            "punpckldq    3%1, %%mm0    \n\t"
+            "movd         6%1, %%mm1    \n\t"
+            "punpckldq    9%1, %%mm1    \n\t"
+            "movd        12%1, %%mm2    \n\t"
+            "punpckldq   15%1, %%mm2    \n\t"
+            "movd        18%1, %%mm3    \n\t"
+            "punpckldq   21%1, %%mm3    \n\t"
+            "por        %%mm7, %%mm0    \n\t"
+            "por        %%mm7, %%mm1    \n\t"
+            "por        %%mm7, %%mm2    \n\t"
+            "por        %%mm7, %%mm3    \n\t"
+            MOVNTQ"     %%mm0,   %0     \n\t"
+            MOVNTQ"     %%mm1,  8%0     \n\t"
+            MOVNTQ"     %%mm2, 16%0     \n\t"
+            MOVNTQ"     %%mm3, 24%0"
+            :"=m"(*dest)
+            :"m"(*s)
+            :"memory");
+            dest += 32;
+            s += 24;
+        }
+        __asm__ volatile(SFENCE:::"memory");
+        __asm__ volatile(EMMS:::"memory");
+    #endif
+    while (s < end)
+    {
+    #ifdef WORDS_BIGENDIAN
+        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
+        *dest++ = 255;
+        *dest++ = s[2];
+        *dest++ = s[1];
+        *dest++ = s[0];
+        s+=3;
+    #else
+        *dest++ = *s++;
+        *dest++ = *s++;
+        *dest++ = *s++;
+        *dest++ = 255;
+    #endif
+    }
+}
+
+static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    uint8_t *dest = dst;
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 31;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movq          %1, %%mm0    \n\t"
+        "movq         8%1, %%mm1    \n\t"
+        "movq        16%1, %%mm4    \n\t"
+        "movq        24%1, %%mm5    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm1, %%mm3    \n\t"
+        "movq       %%mm4, %%mm6    \n\t"
+        "movq       %%mm5, %%mm7    \n\t"
+        "psrlq         $8, %%mm2    \n\t"
+        "psrlq         $8, %%mm3    \n\t"
+        "psrlq         $8, %%mm6    \n\t"
+        "psrlq         $8, %%mm7    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %2, %%mm1    \n\t"
+        "pand          %2, %%mm4    \n\t"
+        "pand          %2, %%mm5    \n\t"
+        "pand          %3, %%mm2    \n\t"
+        "pand          %3, %%mm3    \n\t"
+        "pand          %3, %%mm6    \n\t"
+        "pand          %3, %%mm7    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm3, %%mm1    \n\t"
+        "por        %%mm6, %%mm4    \n\t"
+        "por        %%mm7, %%mm5    \n\t"
+
+        "movq       %%mm1, %%mm2    \n\t"
+        "movq       %%mm4, %%mm3    \n\t"
+        "psllq        $48, %%mm2    \n\t"
+        "psllq        $32, %%mm3    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "pand          %5, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psrlq        $16, %%mm1    \n\t"
+        "psrlq        $32, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm3, %%mm1    \n\t"
+        "pand          %6, %%mm5    \n\t"
+        "por        %%mm5, %%mm4    \n\t"
+
+        MOVNTQ"     %%mm0,   %0     \n\t"
+        MOVNTQ"     %%mm1,  8%0     \n\t"
+        MOVNTQ"     %%mm4, 16%0"
+        :"=m"(*dest)
+        :"m"(*s),"m"(mask24l),
+         "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+        :"memory");
+        dest += 24;
+        s += 32;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+#ifdef WORDS_BIGENDIAN
+        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
+        s++;
+        dest[2] = *s++;
+        dest[1] = *s++;
+        dest[0] = *s++;
+        dest += 3;
+#else
+        *dest++ = *s++;
+        *dest++ = *s++;
+        *dest++ = *s++;
+        s++;
+#endif
+    }
+}
+
+/*
+ original by Strepto/Astral
+ ported to gcc & bugfixed: A'rpi
+ MMX2, 3DNOW optimization by Nick Kurshev
+ 32-bit C version, and and&add trick by Michael Niedermayer
+*/
+static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
+    mm_end = end - 15;
+    while (s<mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"  32%1         \n\t"
+        "movq        %1, %%mm0  \n\t"
+        "movq       8%1, %%mm2  \n\t"
+        "movq     %%mm0, %%mm1  \n\t"
+        "movq     %%mm2, %%mm3  \n\t"
+        "pand     %%mm4, %%mm0  \n\t"
+        "pand     %%mm4, %%mm2  \n\t"
+        "paddw    %%mm1, %%mm0  \n\t"
+        "paddw    %%mm3, %%mm2  \n\t"
+        MOVNTQ"   %%mm0,  %0    \n\t"
+        MOVNTQ"   %%mm2, 8%0"
+        :"=m"(*d)
+        :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    mm_end = end - 3;
+    while (s < mm_end)
+    {
+        register unsigned x= *((const uint32_t *)s);
+        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
+        d+=4;
+        s+=4;
+    }
+    if (s < end)
+    {
+        register unsigned short x= *((const uint16_t *)s);
+        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
+    }
+}
+
+static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    register const uint8_t* s=src;
+    register uint8_t* d=dst;
+    register const uint8_t *end;
+    const uint8_t *mm_end;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
+    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
+    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
+    mm_end = end - 15;
+    while (s<mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"  32%1         \n\t"
+        "movq        %1, %%mm0  \n\t"
+        "movq       8%1, %%mm2  \n\t"
+        "movq     %%mm0, %%mm1  \n\t"
+        "movq     %%mm2, %%mm3  \n\t"
+        "psrlq       $1, %%mm0  \n\t"
+        "psrlq       $1, %%mm2  \n\t"
+        "pand     %%mm7, %%mm0  \n\t"
+        "pand     %%mm7, %%mm2  \n\t"
+        "pand     %%mm6, %%mm1  \n\t"
+        "pand     %%mm6, %%mm3  \n\t"
+        "por      %%mm1, %%mm0  \n\t"
+        "por      %%mm3, %%mm2  \n\t"
+        MOVNTQ"   %%mm0,  %0    \n\t"
+        MOVNTQ"   %%mm2, 8%0"
+        :"=m"(*d)
+        :"m"(*s)
+        );
+        d+=16;
+        s+=16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    mm_end = end - 3;
+    while (s < mm_end)
+    {
+        register uint32_t x= *((const uint32_t*)s);
+        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
+        s+=4;
+        d+=4;
+    }
+    if (s < end)
+    {
+        register uint16_t x= *((const uint16_t*)s);
+        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
+        s+=2;
+        d+=2;
+    }
+}
+
+static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    mm_end = end - 15;
+#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
+    __asm__ volatile(
+    "movq           %3, %%mm5   \n\t"
+    "movq           %4, %%mm6   \n\t"
+    "movq           %5, %%mm7   \n\t"
+    "jmp 2f                     \n\t"
+    ASMALIGN(4)
+    "1:                         \n\t"
+    PREFETCH"   32(%1)          \n\t"
+    "movd         (%1), %%mm0   \n\t"
+    "movd        4(%1), %%mm3   \n\t"
+    "punpckldq   8(%1), %%mm0   \n\t"
+    "punpckldq  12(%1), %%mm3   \n\t"
+    "movq        %%mm0, %%mm1   \n\t"
+    "movq        %%mm3, %%mm4   \n\t"
+    "pand        %%mm6, %%mm0   \n\t"
+    "pand        %%mm6, %%mm3   \n\t"
+    "pmaddwd     %%mm7, %%mm0   \n\t"
+    "pmaddwd     %%mm7, %%mm3   \n\t"
+    "pand        %%mm5, %%mm1   \n\t"
+    "pand        %%mm5, %%mm4   \n\t"
+    "por         %%mm1, %%mm0   \n\t"
+    "por         %%mm4, %%mm3   \n\t"
+    "psrld          $5, %%mm0   \n\t"
+    "pslld         $11, %%mm3   \n\t"
+    "por         %%mm3, %%mm0   \n\t"
+    MOVNTQ"      %%mm0, (%0)    \n\t"
+    "add           $16,  %1     \n\t"
+    "add            $8,  %0     \n\t"
+    "2:                         \n\t"
+    "cmp            %2,  %1     \n\t"
+    " jb            1b          \n\t"
+    : "+r" (d), "+r"(s)
+    : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
+    );
+#else
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq    %0, %%mm7    \n\t"
+        "movq    %1, %%mm6    \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movd          %1, %%mm0    \n\t"
+        "movd         4%1, %%mm3    \n\t"
+        "punpckldq    8%1, %%mm0    \n\t"
+        "punpckldq   12%1, %%mm3    \n\t"
+        "movq       %%mm0, %%mm1    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm3, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "psrlq         $3, %%mm0    \n\t"
+        "psrlq         $3, %%mm3    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %2, %%mm3    \n\t"
+        "psrlq         $5, %%mm1    \n\t"
+        "psrlq         $5, %%mm4    \n\t"
+        "pand       %%mm6, %%mm1    \n\t"
+        "pand       %%mm6, %%mm4    \n\t"
+        "psrlq         $8, %%mm2    \n\t"
+        "psrlq         $8, %%mm5    \n\t"
+        "pand       %%mm7, %%mm2    \n\t"
+        "pand       %%mm7, %%mm5    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        "psllq        $16, %%mm3    \n\t"
+        "por        %%mm3, %%mm0    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 16;
+    }
+#endif
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
+    }
+}
+
+static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    mm_end = end - 15;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movd          %1, %%mm0    \n\t"
+        "movd         4%1, %%mm3    \n\t"
+        "punpckldq    8%1, %%mm0    \n\t"
+        "punpckldq   12%1, %%mm3    \n\t"
+        "movq       %%mm0, %%mm1    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm3, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "psllq         $8, %%mm0    \n\t"
+        "psllq         $8, %%mm3    \n\t"
+        "pand       %%mm7, %%mm0    \n\t"
+        "pand       %%mm7, %%mm3    \n\t"
+        "psrlq         $5, %%mm1    \n\t"
+        "psrlq         $5, %%mm4    \n\t"
+        "pand       %%mm6, %%mm1    \n\t"
+        "pand       %%mm6, %%mm4    \n\t"
+        "psrlq        $19, %%mm2    \n\t"
+        "psrlq        $19, %%mm5    \n\t"
+        "pand          %2, %%mm2    \n\t"
+        "pand          %2, %%mm5    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        "psllq        $16, %%mm3    \n\t"
+        "por        %%mm3, %%mm0    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
+    }
+}
+
+static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    mm_end = end - 15;
+#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
+    __asm__ volatile(
+    "movq           %3, %%mm5   \n\t"
+    "movq           %4, %%mm6   \n\t"
+    "movq           %5, %%mm7   \n\t"
+    "jmp            2f          \n\t"
+    ASMALIGN(4)
+    "1:                         \n\t"
+    PREFETCH"   32(%1)          \n\t"
+    "movd         (%1), %%mm0   \n\t"
+    "movd        4(%1), %%mm3   \n\t"
+    "punpckldq   8(%1), %%mm0   \n\t"
+    "punpckldq  12(%1), %%mm3   \n\t"
+    "movq        %%mm0, %%mm1   \n\t"
+    "movq        %%mm3, %%mm4   \n\t"
+    "pand        %%mm6, %%mm0   \n\t"
+    "pand        %%mm6, %%mm3   \n\t"
+    "pmaddwd     %%mm7, %%mm0   \n\t"
+    "pmaddwd     %%mm7, %%mm3   \n\t"
+    "pand        %%mm5, %%mm1   \n\t"
+    "pand        %%mm5, %%mm4   \n\t"
+    "por         %%mm1, %%mm0   \n\t"
+    "por         %%mm4, %%mm3   \n\t"
+    "psrld          $6, %%mm0   \n\t"
+    "pslld         $10, %%mm3   \n\t"
+    "por         %%mm3, %%mm0   \n\t"
+    MOVNTQ"      %%mm0, (%0)    \n\t"
+    "add           $16,  %1     \n\t"
+    "add            $8,  %0     \n\t"
+    "2:                         \n\t"
+    "cmp            %2,  %1     \n\t"
+    " jb            1b          \n\t"
+    : "+r" (d), "+r"(s)
+    : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
+    );
+#else
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movd          %1, %%mm0    \n\t"
+        "movd         4%1, %%mm3    \n\t"
+        "punpckldq    8%1, %%mm0    \n\t"
+        "punpckldq   12%1, %%mm3    \n\t"
+        "movq       %%mm0, %%mm1    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm3, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "psrlq         $3, %%mm0    \n\t"
+        "psrlq         $3, %%mm3    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %2, %%mm3    \n\t"
+        "psrlq         $6, %%mm1    \n\t"
+        "psrlq         $6, %%mm4    \n\t"
+        "pand       %%mm6, %%mm1    \n\t"
+        "pand       %%mm6, %%mm4    \n\t"
+        "psrlq         $9, %%mm2    \n\t"
+        "psrlq         $9, %%mm5    \n\t"
+        "pand       %%mm7, %%mm2    \n\t"
+        "pand       %%mm7, %%mm5    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        "psllq        $16, %%mm3    \n\t"
+        "por        %%mm3, %%mm0    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 16;
+    }
+#endif
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
+    }
+}
+
+static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 15;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movd          %1, %%mm0    \n\t"
+        "movd         4%1, %%mm3    \n\t"
+        "punpckldq    8%1, %%mm0    \n\t"
+        "punpckldq   12%1, %%mm3    \n\t"
+        "movq       %%mm0, %%mm1    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm3, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "psllq         $7, %%mm0    \n\t"
+        "psllq         $7, %%mm3    \n\t"
+        "pand       %%mm7, %%mm0    \n\t"
+        "pand       %%mm7, %%mm3    \n\t"
+        "psrlq         $6, %%mm1    \n\t"
+        "psrlq         $6, %%mm4    \n\t"
+        "pand       %%mm6, %%mm1    \n\t"
+        "pand       %%mm6, %%mm4    \n\t"
+        "psrlq        $19, %%mm2    \n\t"
+        "psrlq        $19, %%mm5    \n\t"
+        "pand          %2, %%mm2    \n\t"
+        "pand          %2, %%mm5    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        "psllq        $16, %%mm3    \n\t"
+        "por        %%mm3, %%mm0    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 16;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        register int rgb = *(const uint32_t*)s; s += 4;
+        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
+    }
+}
+
+static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq         %0, %%mm7     \n\t"
+        "movq         %1, %%mm6     \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    mm_end = end - 11;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movd          %1, %%mm0    \n\t"
+        "movd         3%1, %%mm3    \n\t"
+        "punpckldq    6%1, %%mm0    \n\t"
+        "punpckldq    9%1, %%mm3    \n\t"
+        "movq       %%mm0, %%mm1    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm3, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "psrlq         $3, %%mm0    \n\t"
+        "psrlq         $3, %%mm3    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %2, %%mm3    \n\t"
+        "psrlq         $5, %%mm1    \n\t"
+        "psrlq         $5, %%mm4    \n\t"
+        "pand       %%mm6, %%mm1    \n\t"
+        "pand       %%mm6, %%mm4    \n\t"
+        "psrlq         $8, %%mm2    \n\t"
+        "psrlq         $8, %%mm5    \n\t"
+        "pand       %%mm7, %%mm2    \n\t"
+        "pand       %%mm7, %%mm5    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        "psllq        $16, %%mm3    \n\t"
+        "por        %%mm3, %%mm0    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        const int b = *s++;
+        const int g = *s++;
+        const int r = *s++;
+        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
+    }
+}
+
+static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq         %0, %%mm7     \n\t"
+        "movq         %1, %%mm6     \n\t"
+        ::"m"(red_16mask),"m"(green_16mask));
+    mm_end = end - 15;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movd          %1, %%mm0    \n\t"
+        "movd         3%1, %%mm3    \n\t"
+        "punpckldq    6%1, %%mm0    \n\t"
+        "punpckldq    9%1, %%mm3    \n\t"
+        "movq       %%mm0, %%mm1    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm3, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "psllq         $8, %%mm0    \n\t"
+        "psllq         $8, %%mm3    \n\t"
+        "pand       %%mm7, %%mm0    \n\t"
+        "pand       %%mm7, %%mm3    \n\t"
+        "psrlq         $5, %%mm1    \n\t"
+        "psrlq         $5, %%mm4    \n\t"
+        "pand       %%mm6, %%mm1    \n\t"
+        "pand       %%mm6, %%mm4    \n\t"
+        "psrlq        $19, %%mm2    \n\t"
+        "psrlq        $19, %%mm5    \n\t"
+        "pand          %2, %%mm2    \n\t"
+        "pand          %2, %%mm5    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        "psllq        $16, %%mm3    \n\t"
+        "por        %%mm3, %%mm0    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        const int r = *s++;
+        const int g = *s++;
+        const int b = *s++;
+        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
+    }
+}
+
+static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq          %0, %%mm7    \n\t"
+        "movq          %1, %%mm6    \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 11;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movd          %1, %%mm0    \n\t"
+        "movd         3%1, %%mm3    \n\t"
+        "punpckldq    6%1, %%mm0    \n\t"
+        "punpckldq    9%1, %%mm3    \n\t"
+        "movq       %%mm0, %%mm1    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm3, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "psrlq         $3, %%mm0    \n\t"
+        "psrlq         $3, %%mm3    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %2, %%mm3    \n\t"
+        "psrlq         $6, %%mm1    \n\t"
+        "psrlq         $6, %%mm4    \n\t"
+        "pand       %%mm6, %%mm1    \n\t"
+        "pand       %%mm6, %%mm4    \n\t"
+        "psrlq         $9, %%mm2    \n\t"
+        "psrlq         $9, %%mm5    \n\t"
+        "pand       %%mm7, %%mm2    \n\t"
+        "pand       %%mm7, %%mm5    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        "psllq        $16, %%mm3    \n\t"
+        "por        %%mm3, %%mm0    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        const int b = *s++;
+        const int g = *s++;
+        const int r = *s++;
+        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
+    }
+}
+
+static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint8_t *s = src;
+    const uint8_t *end;
+#if HAVE_MMX
+    const uint8_t *mm_end;
+#endif
+    uint16_t *d = (uint16_t *)dst;
+    end = s + src_size;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
+    __asm__ volatile(
+        "movq         %0, %%mm7     \n\t"
+        "movq         %1, %%mm6     \n\t"
+        ::"m"(red_15mask),"m"(green_15mask));
+    mm_end = end - 15;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"   32%1            \n\t"
+        "movd         %1, %%mm0     \n\t"
+        "movd        3%1, %%mm3     \n\t"
+        "punpckldq   6%1, %%mm0     \n\t"
+        "punpckldq   9%1, %%mm3     \n\t"
+        "movq      %%mm0, %%mm1     \n\t"
+        "movq      %%mm0, %%mm2     \n\t"
+        "movq      %%mm3, %%mm4     \n\t"
+        "movq      %%mm3, %%mm5     \n\t"
+        "psllq        $7, %%mm0     \n\t"
+        "psllq        $7, %%mm3     \n\t"
+        "pand      %%mm7, %%mm0     \n\t"
+        "pand      %%mm7, %%mm3     \n\t"
+        "psrlq        $6, %%mm1     \n\t"
+        "psrlq        $6, %%mm4     \n\t"
+        "pand      %%mm6, %%mm1     \n\t"
+        "pand      %%mm6, %%mm4     \n\t"
+        "psrlq       $19, %%mm2     \n\t"
+        "psrlq       $19, %%mm5     \n\t"
+        "pand         %2, %%mm2     \n\t"
+        "pand         %2, %%mm5     \n\t"
+        "por       %%mm1, %%mm0     \n\t"
+        "por       %%mm4, %%mm3     \n\t"
+        "por       %%mm2, %%mm0     \n\t"
+        "por       %%mm5, %%mm3     \n\t"
+        "psllq       $16, %%mm3     \n\t"
+        "por       %%mm3, %%mm0     \n\t"
+        MOVNTQ"    %%mm0, %0        \n\t"
+        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+        d += 4;
+        s += 12;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        const int r = *s++;
+        const int g = *s++;
+        const int b = *s++;
+        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
+    }
+}
+
+/*
+  I use less accurate approximation here by simply left-shifting the input
+  value and filling the low order bits with zeroes. This method improves PNG
+  compression but this scheme cannot reproduce white exactly, since it does
+  not generate an all-ones maximum value; the net effect is to darken the
+  image slightly.
+
+  The better method should be "left bit replication":
+
+   4 3 2 1 0
+   ---------
+   1 1 0 1 1
+
+   7 6 5 4 3  2 1 0
+   ----------------
+   1 1 0 1 1  1 1 0
+   |=======|  |===|
+       |      leftmost bits repeated to fill open bits
+       |
+   original bits
+*/
+static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+#if HAVE_MMX
+    const uint16_t *mm_end;
+#endif
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t*)src;
+    end = s + src_size/2;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 7;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movq          %1, %%mm0    \n\t"
+        "movq          %1, %%mm1    \n\t"
+        "movq          %1, %%mm2    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %3, %%mm1    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "psllq         $3, %%mm0    \n\t"
+        "psrlq         $2, %%mm1    \n\t"
+        "psrlq         $7, %%mm2    \n\t"
+        "movq       %%mm0, %%mm3    \n\t"
+        "movq       %%mm1, %%mm4    \n\t"
+        "movq       %%mm2, %%mm5    \n\t"
+        "punpcklwd     %5, %%mm0    \n\t"
+        "punpcklwd     %5, %%mm1    \n\t"
+        "punpcklwd     %5, %%mm2    \n\t"
+        "punpckhwd     %5, %%mm3    \n\t"
+        "punpckhwd     %5, %%mm4    \n\t"
+        "punpckhwd     %5, %%mm5    \n\t"
+        "psllq         $8, %%mm1    \n\t"
+        "psllq        $16, %%mm2    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psllq         $8, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+
+        "movq       %%mm0, %%mm6    \n\t"
+        "movq       %%mm3, %%mm7    \n\t"
+
+        "movq         8%1, %%mm0    \n\t"
+        "movq         8%1, %%mm1    \n\t"
+        "movq         8%1, %%mm2    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %3, %%mm1    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "psllq         $3, %%mm0    \n\t"
+        "psrlq         $2, %%mm1    \n\t"
+        "psrlq         $7, %%mm2    \n\t"
+        "movq       %%mm0, %%mm3    \n\t"
+        "movq       %%mm1, %%mm4    \n\t"
+        "movq       %%mm2, %%mm5    \n\t"
+        "punpcklwd     %5, %%mm0    \n\t"
+        "punpcklwd     %5, %%mm1    \n\t"
+        "punpcklwd     %5, %%mm2    \n\t"
+        "punpckhwd     %5, %%mm3    \n\t"
+        "punpckhwd     %5, %%mm4    \n\t"
+        "punpckhwd     %5, %%mm5    \n\t"
+        "psllq         $8, %%mm1    \n\t"
+        "psllq        $16, %%mm2    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psllq         $8, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+
+        :"=m"(*d)
+        :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+        :"memory");
+        /* borrowed 32 to 24 */
+        __asm__ volatile(
+        "movq       %%mm0, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "movq       %%mm6, %%mm0    \n\t"
+        "movq       %%mm7, %%mm1    \n\t"
+
+        "movq       %%mm4, %%mm6    \n\t"
+        "movq       %%mm5, %%mm7    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm1, %%mm3    \n\t"
+
+        "psrlq         $8, %%mm2    \n\t"
+        "psrlq         $8, %%mm3    \n\t"
+        "psrlq         $8, %%mm6    \n\t"
+        "psrlq         $8, %%mm7    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %2, %%mm1    \n\t"
+        "pand          %2, %%mm4    \n\t"
+        "pand          %2, %%mm5    \n\t"
+        "pand          %3, %%mm2    \n\t"
+        "pand          %3, %%mm3    \n\t"
+        "pand          %3, %%mm6    \n\t"
+        "pand          %3, %%mm7    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm3, %%mm1    \n\t"
+        "por        %%mm6, %%mm4    \n\t"
+        "por        %%mm7, %%mm5    \n\t"
+
+        "movq       %%mm1, %%mm2    \n\t"
+        "movq       %%mm4, %%mm3    \n\t"
+        "psllq        $48, %%mm2    \n\t"
+        "psllq        $32, %%mm3    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "pand          %5, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psrlq        $16, %%mm1    \n\t"
+        "psrlq        $32, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm3, %%mm1    \n\t"
+        "pand          %6, %%mm5    \n\t"
+        "por        %%mm5, %%mm4    \n\t"
+
+        MOVNTQ"     %%mm0,   %0     \n\t"
+        MOVNTQ"     %%mm1,  8%0     \n\t"
+        MOVNTQ"     %%mm4, 16%0"
+
+        :"=m"(*d)
+        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+        :"memory");
+        d += 24;
+        s += 8;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x3E0)>>2;
+        *d++ = (bgr&0x7C00)>>7;
+    }
+}
+
+static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+#if HAVE_MMX
+    const uint16_t *mm_end;
+#endif
+    uint8_t *d = (uint8_t *)dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    mm_end = end - 7;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movq          %1, %%mm0    \n\t"
+        "movq          %1, %%mm1    \n\t"
+        "movq          %1, %%mm2    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %3, %%mm1    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "psllq         $3, %%mm0    \n\t"
+        "psrlq         $3, %%mm1    \n\t"
+        "psrlq         $8, %%mm2    \n\t"
+        "movq       %%mm0, %%mm3    \n\t"
+        "movq       %%mm1, %%mm4    \n\t"
+        "movq       %%mm2, %%mm5    \n\t"
+        "punpcklwd     %5, %%mm0    \n\t"
+        "punpcklwd     %5, %%mm1    \n\t"
+        "punpcklwd     %5, %%mm2    \n\t"
+        "punpckhwd     %5, %%mm3    \n\t"
+        "punpckhwd     %5, %%mm4    \n\t"
+        "punpckhwd     %5, %%mm5    \n\t"
+        "psllq         $8, %%mm1    \n\t"
+        "psllq        $16, %%mm2    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psllq         $8, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+
+        "movq       %%mm0, %%mm6    \n\t"
+        "movq       %%mm3, %%mm7    \n\t"
+
+        "movq         8%1, %%mm0    \n\t"
+        "movq         8%1, %%mm1    \n\t"
+        "movq         8%1, %%mm2    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %3, %%mm1    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "psllq         $3, %%mm0    \n\t"
+        "psrlq         $3, %%mm1    \n\t"
+        "psrlq         $8, %%mm2    \n\t"
+        "movq       %%mm0, %%mm3    \n\t"
+        "movq       %%mm1, %%mm4    \n\t"
+        "movq       %%mm2, %%mm5    \n\t"
+        "punpcklwd     %5, %%mm0    \n\t"
+        "punpcklwd     %5, %%mm1    \n\t"
+        "punpcklwd     %5, %%mm2    \n\t"
+        "punpckhwd     %5, %%mm3    \n\t"
+        "punpckhwd     %5, %%mm4    \n\t"
+        "punpckhwd     %5, %%mm5    \n\t"
+        "psllq         $8, %%mm1    \n\t"
+        "psllq        $16, %%mm2    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psllq         $8, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        :"=m"(*d)
+        :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+        :"memory");
+        /* borrowed 32 to 24 */
+        __asm__ volatile(
+        "movq       %%mm0, %%mm4    \n\t"
+        "movq       %%mm3, %%mm5    \n\t"
+        "movq       %%mm6, %%mm0    \n\t"
+        "movq       %%mm7, %%mm1    \n\t"
+
+        "movq       %%mm4, %%mm6    \n\t"
+        "movq       %%mm5, %%mm7    \n\t"
+        "movq       %%mm0, %%mm2    \n\t"
+        "movq       %%mm1, %%mm3    \n\t"
+
+        "psrlq         $8, %%mm2    \n\t"
+        "psrlq         $8, %%mm3    \n\t"
+        "psrlq         $8, %%mm6    \n\t"
+        "psrlq         $8, %%mm7    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %2, %%mm1    \n\t"
+        "pand          %2, %%mm4    \n\t"
+        "pand          %2, %%mm5    \n\t"
+        "pand          %3, %%mm2    \n\t"
+        "pand          %3, %%mm3    \n\t"
+        "pand          %3, %%mm6    \n\t"
+        "pand          %3, %%mm7    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "por        %%mm3, %%mm1    \n\t"
+        "por        %%mm6, %%mm4    \n\t"
+        "por        %%mm7, %%mm5    \n\t"
+
+        "movq       %%mm1, %%mm2    \n\t"
+        "movq       %%mm4, %%mm3    \n\t"
+        "psllq        $48, %%mm2    \n\t"
+        "psllq        $32, %%mm3    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "pand          %5, %%mm3    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psrlq        $16, %%mm1    \n\t"
+        "psrlq        $32, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm3, %%mm1    \n\t"
+        "pand          %6, %%mm5    \n\t"
+        "por        %%mm5, %%mm4    \n\t"
+
+        MOVNTQ"     %%mm0,   %0     \n\t"
+        MOVNTQ"     %%mm1,  8%0     \n\t"
+        MOVNTQ"     %%mm4, 16%0"
+
+        :"=m"(*d)
+        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+        :"memory");
+        d += 24;
+        s += 8;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        register uint16_t bgr;
+        bgr = *s++;
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x7E0)>>3;
+        *d++ = (bgr&0xF800)>>8;
+    }
+}
+
+static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+#if HAVE_MMX
+    const uint16_t *mm_end;
+#endif
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t *)src;
+    end = s + src_size/2;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
+    mm_end = end - 3;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movq          %1, %%mm0    \n\t"
+        "movq          %1, %%mm1    \n\t"
+        "movq          %1, %%mm2    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %3, %%mm1    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "psllq         $3, %%mm0    \n\t"
+        "psrlq         $2, %%mm1    \n\t"
+        "psrlq         $7, %%mm2    \n\t"
+        "movq       %%mm0, %%mm3    \n\t"
+        "movq       %%mm1, %%mm4    \n\t"
+        "movq       %%mm2, %%mm5    \n\t"
+        "punpcklwd  %%mm7, %%mm0    \n\t"
+        "punpcklwd  %%mm7, %%mm1    \n\t"
+        "punpcklwd  %%mm7, %%mm2    \n\t"
+        "punpckhwd  %%mm7, %%mm3    \n\t"
+        "punpckhwd  %%mm7, %%mm4    \n\t"
+        "punpckhwd  %%mm7, %%mm5    \n\t"
+        "psllq         $8, %%mm1    \n\t"
+        "psllq        $16, %%mm2    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psllq         $8, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        MOVNTQ"     %%mm0,  %0      \n\t"
+        MOVNTQ"     %%mm3, 8%0      \n\t"
+        :"=m"(*d)
+        :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+        :"memory");
+        d += 16;
+        s += 4;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+#if 0 //slightly slower on Athlon
+        int bgr= *s++;
+        *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
+#else
+        register uint16_t bgr;
+        bgr = *s++;
+#ifdef WORDS_BIGENDIAN
+        *d++ = 255;
+        *d++ = (bgr&0x7C00)>>7;
+        *d++ = (bgr&0x3E0)>>2;
+        *d++ = (bgr&0x1F)<<3;
+#else
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x3E0)>>2;
+        *d++ = (bgr&0x7C00)>>7;
+        *d++ = 255;
+#endif
+
+#endif
+    }
+}
+
+static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    const uint16_t *end;
+#if HAVE_MMX
+    const uint16_t *mm_end;
+#endif
+    uint8_t *d = dst;
+    const uint16_t *s = (const uint16_t*)src;
+    end = s + src_size/2;
+#if HAVE_MMX
+    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
+    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
+    mm_end = end - 3;
+    while (s < mm_end)
+    {
+        __asm__ volatile(
+        PREFETCH"    32%1           \n\t"
+        "movq          %1, %%mm0    \n\t"
+        "movq          %1, %%mm1    \n\t"
+        "movq          %1, %%mm2    \n\t"
+        "pand          %2, %%mm0    \n\t"
+        "pand          %3, %%mm1    \n\t"
+        "pand          %4, %%mm2    \n\t"
+        "psllq         $3, %%mm0    \n\t"
+        "psrlq         $3, %%mm1    \n\t"
+        "psrlq         $8, %%mm2    \n\t"
+        "movq       %%mm0, %%mm3    \n\t"
+        "movq       %%mm1, %%mm4    \n\t"
+        "movq       %%mm2, %%mm5    \n\t"
+        "punpcklwd  %%mm7, %%mm0    \n\t"
+        "punpcklwd  %%mm7, %%mm1    \n\t"
+        "punpcklwd  %%mm7, %%mm2    \n\t"
+        "punpckhwd  %%mm7, %%mm3    \n\t"
+        "punpckhwd  %%mm7, %%mm4    \n\t"
+        "punpckhwd  %%mm7, %%mm5    \n\t"
+        "psllq         $8, %%mm1    \n\t"
+        "psllq        $16, %%mm2    \n\t"
+        "por        %%mm1, %%mm0    \n\t"
+        "por        %%mm2, %%mm0    \n\t"
+        "psllq         $8, %%mm4    \n\t"
+        "psllq        $16, %%mm5    \n\t"
+        "por        %%mm4, %%mm3    \n\t"
+        "por        %%mm5, %%mm3    \n\t"
+        MOVNTQ"     %%mm0, %0       \n\t"
+        MOVNTQ"     %%mm3, 8%0      \n\t"
+        :"=m"(*d)
+        :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+        :"memory");
+        d += 16;
+        s += 4;
+    }
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    while (s < end)
+    {
+        register uint16_t bgr;
+        bgr = *s++;
+#ifdef WORDS_BIGENDIAN
+        *d++ = 255;
+        *d++ = (bgr&0xF800)>>8;
+        *d++ = (bgr&0x7E0)>>3;
+        *d++ = (bgr&0x1F)<<3;
+#else
+        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr&0x7E0)>>3;
+        *d++ = (bgr&0xF800)>>8;
+        *d++ = 255;
+#endif
+    }
+}
+
+static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    long idx = 15 - src_size;
+    const uint8_t *s = src-idx;
+    uint8_t *d = dst-idx;
+#if HAVE_MMX
+    __asm__ volatile(
+    "test          %0, %0           \n\t"
+    "jns           2f               \n\t"
+    PREFETCH"       (%1, %0)        \n\t"
+    "movq          %3, %%mm7        \n\t"
+    "pxor          %4, %%mm7        \n\t"
+    "movq       %%mm7, %%mm6        \n\t"
+    "pxor          %5, %%mm7        \n\t"
+    ASMALIGN(4)
+    "1:                             \n\t"
+    PREFETCH"     32(%1, %0)        \n\t"
+    "movq           (%1, %0), %%mm0 \n\t"
+    "movq          8(%1, %0), %%mm1 \n\t"
+# if HAVE_MMX2
+    "pshufw      $177, %%mm0, %%mm3 \n\t"
+    "pshufw      $177, %%mm1, %%mm5 \n\t"
+    "pand       %%mm7, %%mm0        \n\t"
+    "pand       %%mm6, %%mm3        \n\t"
+    "pand       %%mm7, %%mm1        \n\t"
+    "pand       %%mm6, %%mm5        \n\t"
+    "por        %%mm3, %%mm0        \n\t"
+    "por        %%mm5, %%mm1        \n\t"
+# else
+    "movq       %%mm0, %%mm2        \n\t"
+    "movq       %%mm1, %%mm4        \n\t"
+    "pand       %%mm7, %%mm0        \n\t"
+    "pand       %%mm6, %%mm2        \n\t"
+    "pand       %%mm7, %%mm1        \n\t"
+    "pand       %%mm6, %%mm4        \n\t"
+    "movq       %%mm2, %%mm3        \n\t"
+    "movq       %%mm4, %%mm5        \n\t"
+    "pslld        $16, %%mm2        \n\t"
+    "psrld        $16, %%mm3        \n\t"
+    "pslld        $16, %%mm4        \n\t"
+    "psrld        $16, %%mm5        \n\t"
+    "por        %%mm2, %%mm0        \n\t"
+    "por        %%mm4, %%mm1        \n\t"
+    "por        %%mm3, %%mm0        \n\t"
+    "por        %%mm5, %%mm1        \n\t"
+# endif
+    MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
+    MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
+    "add          $16, %0           \n\t"
+    "js            1b               \n\t"
+    SFENCE"                         \n\t"
+    EMMS"                           \n\t"
+    "2:                             \n\t"
+    : "+&r"(idx)
+    : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
+    : "memory");
+#endif
+    for (; idx<15; idx+=4) {
+        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
+        v &= 0xff00ff;
+        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
+    }
+}
+
+static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+{
+    unsigned i;
+#if HAVE_MMX
+    long mmx_size= 23 - src_size;
+    __asm__ volatile (
+    "test             %%"REG_a", %%"REG_a"          \n\t"
+    "jns                     2f                     \n\t"
+    "movq     "MANGLE(mask24r)", %%mm5              \n\t"
+    "movq     "MANGLE(mask24g)", %%mm6              \n\t"
+    "movq     "MANGLE(mask24b)", %%mm7              \n\t"
+    ASMALIGN(4)
+    "1:                                             \n\t"
+    PREFETCH" 32(%1, %%"REG_a")                     \n\t"
+    "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
+    "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
+    "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
+    "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
+    "pand                 %%mm5, %%mm0              \n\t"
+    "pand                 %%mm6, %%mm1              \n\t"
+    "pand                 %%mm7, %%mm2              \n\t"
+    "por                  %%mm0, %%mm1              \n\t"
+    "por                  %%mm2, %%mm1              \n\t"
+    "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
+    MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
+    "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
+    "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
+    "pand                 %%mm7, %%mm0              \n\t"
+    "pand                 %%mm5, %%mm1              \n\t"
+    "pand                 %%mm6, %%mm2              \n\t"
+    "por                  %%mm0, %%mm1              \n\t"
+    "por                  %%mm2, %%mm1              \n\t"
+    "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
+    MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
+    "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
+    "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
+    "pand                 %%mm6, %%mm0              \n\t"
+    "pand                 %%mm7, %%mm1              \n\t"
+    "pand                 %%mm5, %%mm2              \n\t"
+    "por                  %%mm0, %%mm1              \n\t"
+    "por                  %%mm2, %%mm1              \n\t"
+    MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
+    "add                    $24, %%"REG_a"          \n\t"
+    " js                     1b                     \n\t"
+    "2:                                             \n\t"
+    : "+a" (mmx_size)
+    : "r" (src-mmx_size), "r"(dst-mmx_size)
+    );
+
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+
+    if (mmx_size==23) return; //finished, was multiple of 8
+
+    src+= src_size;
+    dst+= src_size;
+    src_size= 23-mmx_size;
+    src-= src_size;
+    dst-= src_size;
+#endif
+    for (i=0; i<src_size; i+=3)
+    {
+        register uint8_t x;
+        x          = src[i + 2];
+        dst[i + 1] = src[i + 1];
+        dst[i + 2] = src[i + 0];
+        dst[i + 0] = x;
+    }
+}
+
+static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                           long width, long height,
+                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+{
+    long y;
+    const long chromWidth= width>>1;
+    for (y=0; y<height; y++)
+    {
+#if HAVE_MMX
+//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
+        __asm__ volatile(
+        "xor                 %%"REG_a", %%"REG_a"   \n\t"
+        ASMALIGN(4)
+        "1:                                         \n\t"
+        PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
+        PREFETCH"    32(%2, %%"REG_a")              \n\t"
+        PREFETCH"    32(%3, %%"REG_a")              \n\t"
+        "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
+        "movq                    %%mm0, %%mm2       \n\t" // U(0)
+        "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
+        "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
+        "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
+
+        "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
+        "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
+        "movq                    %%mm3, %%mm4       \n\t" // Y(0)
+        "movq                    %%mm5, %%mm6       \n\t" // Y(8)
+        "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
+        "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
+        "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
+        "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
+
+        MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
+        MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
+        MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
+        MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
+
+        "add                        $8, %%"REG_a"   \n\t"
+        "cmp                        %4, %%"REG_a"   \n\t"
+        " jb                        1b              \n\t"
+        ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
+        : "%"REG_a
+        );
+#else
+
+#if ARCH_ALPHA && HAVE_MVI
+#define pl2yuy2(n)                  \
+    y1 = yc[n];                     \
+    y2 = yc2[n];                    \
+    u = uc[n];                      \
+    v = vc[n];                      \
+    __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1));  \
+    __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2));  \
+    __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u));    \
+    __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v));    \
+    yuv1 = (u << 8) + (v << 24);                \
+    yuv2 = yuv1 + y2;               \
+    yuv1 += y1;                     \
+    qdst[n]  = yuv1;                \
+    qdst2[n] = yuv2;
+
+        int i;
+        uint64_t *qdst = (uint64_t *) dst;
+        uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
+        const uint32_t *yc = (uint32_t *) ysrc;
+        const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
+        const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
+        for (i = 0; i < chromWidth; i += 8){
+            uint64_t y1, y2, yuv1, yuv2;
+            uint64_t u, v;
+            /* Prefetch */
+            __asm__("ldq $31,64(%0)" :: "r"(yc));
+            __asm__("ldq $31,64(%0)" :: "r"(yc2));
+            __asm__("ldq $31,64(%0)" :: "r"(uc));
+            __asm__("ldq $31,64(%0)" :: "r"(vc));
+
+            pl2yuy2(0);
+            pl2yuy2(1);
+            pl2yuy2(2);
+            pl2yuy2(3);
+
+            yc    += 4;
+            yc2   += 4;
+            uc    += 4;
+            vc    += 4;
+            qdst  += 4;
+            qdst2 += 4;
+        }
+        y++;
+        ysrc += lumStride;
+        dst += dstStride;
+
+#elif HAVE_FAST_64BIT
+        int i;
+        uint64_t *ldst = (uint64_t *) dst;
+        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
+        for (i = 0; i < chromWidth; i += 2){
+            uint64_t k, l;
+            k = yc[0] + (uc[0] << 8) +
+                (yc[1] << 16) + (vc[0] << 24);
+            l = yc[2] + (uc[1] << 8) +
+                (yc[3] << 16) + (vc[1] << 24);
+            *ldst++ = k + (l << 32);
+            yc += 4;
+            uc += 2;
+            vc += 2;
+        }
+
+#else
+        int i, *idst = (int32_t *) dst;
+        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
+        for (i = 0; i < chromWidth; i++){
+#ifdef WORDS_BIGENDIAN
+            *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
+                (yc[1] << 8) + (vc[0] << 0);
+#else
+            *idst++ = yc[0] + (uc[0] << 8) +
+                (yc[1] << 16) + (vc[0] << 24);
+#endif
+            yc += 2;
+            uc++;
+            vc++;
+        }
+#endif
+#endif
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
+        {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst  += dstStride;
+    }
+#if HAVE_MMX
+__asm__(    EMMS"       \n\t"
+        SFENCE"     \n\t"
+        :::"memory");
+#endif
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long dstStride)
+{
+    //FIXME interpolate chroma
+    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
+}
+
+static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                           long width, long height,
+                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+{
+    long y;
+    const long chromWidth= width>>1;
+    for (y=0; y<height; y++)
+    {
+#if HAVE_MMX
+//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
+        __asm__ volatile(
+        "xor                %%"REG_a", %%"REG_a"    \n\t"
+        ASMALIGN(4)
+        "1:                                         \n\t"
+        PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
+        PREFETCH"   32(%2, %%"REG_a")               \n\t"
+        PREFETCH"   32(%3, %%"REG_a")               \n\t"
+        "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
+        "movq                   %%mm0, %%mm2        \n\t" // U(0)
+        "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
+        "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
+        "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
+
+        "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
+        "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
+        "movq                   %%mm0, %%mm4        \n\t" // Y(0)
+        "movq                   %%mm2, %%mm6        \n\t" // Y(8)
+        "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
+        "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
+        "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
+        "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
+
+        MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
+        MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
+        MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
+        MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
+
+        "add                       $8, %%"REG_a"    \n\t"
+        "cmp                       %4, %%"REG_a"    \n\t"
+        " jb                       1b               \n\t"
+        ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
+        : "%"REG_a
+        );
+#else
+//FIXME adapt the Alpha ASM code from yv12->yuy2
+
+#if HAVE_FAST_64BIT
+        int i;
+        uint64_t *ldst = (uint64_t *) dst;
+        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
+        for (i = 0; i < chromWidth; i += 2){
+            uint64_t k, l;
+            k = uc[0] + (yc[0] << 8) +
+                (vc[0] << 16) + (yc[1] << 24);
+            l = uc[1] + (yc[2] << 8) +
+                (vc[1] << 16) + (yc[3] << 24);
+            *ldst++ = k + (l << 32);
+            yc += 4;
+            uc += 2;
+            vc += 2;
+        }
+
+#else
+        int i, *idst = (int32_t *) dst;
+        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
+        for (i = 0; i < chromWidth; i++){
+#ifdef WORDS_BIGENDIAN
+            *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
+                (vc[0] << 8) + (yc[1] << 0);
+#else
+            *idst++ = uc[0] + (yc[0] << 8) +
+               (vc[0] << 16) + (yc[1] << 24);
+#endif
+            yc += 2;
+            uc++;
+            vc++;
+        }
+#endif
+#endif
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
+        {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst += dstStride;
+    }
+#if HAVE_MMX
+__asm__(    EMMS"       \n\t"
+        SFENCE"     \n\t"
+        :::"memory");
+#endif
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long dstStride)
+{
+    //FIXME interpolate chroma
+    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
+}
+
+/**
+ * Width should be a multiple of 16.
+ */
+static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                         long width, long height,
+                                         long lumStride, long chromStride, long dstStride)
+{
+    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
+}
+
+/**
+ * Width should be a multiple of 16.
+ */
+static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+                                         long width, long height,
+                                         long lumStride, long chromStride, long dstStride)
+{
+    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ */
+static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= width>>1;
+    for (y=0; y<height; y+=2)
+    {
+#if HAVE_MMX
+        __asm__ volatile(
+        "xor                 %%"REG_a", %%"REG_a"   \n\t"
+        "pcmpeqw                 %%mm7, %%mm7       \n\t"
+        "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
+        ASMALIGN(4)
+        "1:                \n\t"
+        PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
+        "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
+        "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
+        "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
+        "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
+        "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
+        "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
+        "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
+        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
+        "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
+        "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
+
+        MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
+
+        "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
+        "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
+        "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
+        "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
+        "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
+        "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
+        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
+        "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
+        "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
+        "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
+
+        MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
+
+        "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
+        "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
+        "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
+        "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
+        "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
+        "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
+        "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
+        "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
+
+        MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
+        MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
+
+        "add                        $8, %%"REG_a"   \n\t"
+        "cmp                        %4, %%"REG_a"   \n\t"
+        " jb                        1b              \n\t"
+        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+        : "memory", "%"REG_a
+        );
+
+        ydst += lumStride;
+        src  += srcStride;
+
+        __asm__ volatile(
+        "xor                 %%"REG_a", %%"REG_a"   \n\t"
+        ASMALIGN(4)
+        "1:                                         \n\t"
+        PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
+        "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
+        "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
+        "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
+        "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
+        "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
+        "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
+        "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
+        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
+        "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
+        "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
+
+        MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
+        MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
+
+        "add                        $8, %%"REG_a"   \n\t"
+        "cmp                        %4, %%"REG_a"   \n\t"
+        " jb                        1b              \n\t"
+
+        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+        : "memory", "%"REG_a
+        );
+#else
+        long i;
+        for (i=0; i<chromWidth; i++)
+        {
+            ydst[2*i+0]     = src[4*i+0];
+            udst[i]     = src[4*i+1];
+            ydst[2*i+1]     = src[4*i+2];
+            vdst[i]     = src[4*i+3];
+        }
+        ydst += lumStride;
+        src  += srcStride;
+
+        for (i=0; i<chromWidth; i++)
+        {
+            ydst[2*i+0]     = src[4*i+0];
+            ydst[2*i+1]     = src[4*i+2];
+        }
+#endif
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+#if HAVE_MMX
+__asm__ volatile(   EMMS"       \n\t"
+                SFENCE"     \n\t"
+                :::"memory");
+#endif
+}
+
+static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
+                                      uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                                      long width, long height, long lumStride, long chromStride)
+{
+    /* Y Plane */
+    memcpy(ydst, ysrc, width*height);
+
+    /* XXX: implement upscaling for U,V */
+}
+
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
+{
+    long x,y;
+
+    dst[0]= src[0];
+
+    // first line
+    for (x=0; x<srcWidth-1; x++){
+        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
+        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
+    }
+    dst[2*srcWidth-1]= src[srcWidth-1];
+
+        dst+= dstStride;
+
+    for (y=1; y<srcHeight; y++){
+#if HAVE_MMX2 || HAVE_AMD3DNOW
+        const long mmxSize= srcWidth&~15;
+        __asm__ volatile(
+        "mov           %4, %%"REG_a"            \n\t"
+        "1:                                     \n\t"
+        "movq         (%0, %%"REG_a"), %%mm0    \n\t"
+        "movq         (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
+        "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
+        "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
+        "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
+        PAVGB"                  %%mm0, %%mm5    \n\t"
+        PAVGB"                  %%mm0, %%mm3    \n\t"
+        PAVGB"                  %%mm0, %%mm5    \n\t"
+        PAVGB"                  %%mm0, %%mm3    \n\t"
+        PAVGB"                  %%mm1, %%mm4    \n\t"
+        PAVGB"                  %%mm1, %%mm2    \n\t"
+        PAVGB"                  %%mm1, %%mm4    \n\t"
+        PAVGB"                  %%mm1, %%mm2    \n\t"
+        "movq                   %%mm5, %%mm7    \n\t"
+        "movq                   %%mm4, %%mm6    \n\t"
+        "punpcklbw              %%mm3, %%mm5    \n\t"
+        "punpckhbw              %%mm3, %%mm7    \n\t"
+        "punpcklbw              %%mm2, %%mm4    \n\t"
+        "punpckhbw              %%mm2, %%mm6    \n\t"
+#if 1
+        MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
+        MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
+        MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
+        MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
+#else
+        "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
+        "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
+        "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
+        "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
+#endif
+        "add                       $8, %%"REG_a"            \n\t"
+        " js                       1b                       \n\t"
+        :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
+           "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
+           "g" (-mmxSize)
+        : "%"REG_a
+
+        );
+#else
+        const long mmxSize=1;
+#endif
+        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
+        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
+
+        for (x=mmxSize-1; x<srcWidth-1; x++){
+            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
+            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
+            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
+            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
+        }
+        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
+        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
+
+        dst+=dstStride*2;
+        src+=srcStride;
+    }
+
+    // last line
+#if 1
+    dst[0]= src[0];
+
+    for (x=0; x<srcWidth-1; x++){
+        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
+        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
+    }
+    dst[2*srcWidth-1]= src[srcWidth-1];
+#else
+    for (x=0; x<srcWidth; x++){
+        dst[2*x+0]=
+        dst[2*x+1]= src[x];
+    }
+#endif
+
+#if HAVE_MMX
+__asm__ volatile(   EMMS"       \n\t"
+                SFENCE"     \n\t"
+                :::"memory");
+#endif
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line, others are ignored.
+ * FIXME: Write HQ version.
+ */
+static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= width>>1;
+    for (y=0; y<height; y+=2)
+    {
+#if HAVE_MMX
+        __asm__ volatile(
+        "xor                 %%"REG_a", %%"REG_a"   \n\t"
+        "pcmpeqw             %%mm7, %%mm7   \n\t"
+        "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
+        ASMALIGN(4)
+        "1:                                 \n\t"
+        PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
+        "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
+        "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
+        "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
+        "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
+        "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
+        "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
+        "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
+        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
+        "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
+        "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
+
+        MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
+
+        "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
+        "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
+        "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
+        "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
+        "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
+        "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
+        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
+        "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
+        "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
+        "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
+
+        MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
+
+        "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
+        "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
+        "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
+        "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
+        "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
+        "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
+        "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
+        "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
+
+        MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
+        MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
+
+        "add                    $8, %%"REG_a"   \n\t"
+        "cmp                    %4, %%"REG_a"   \n\t"
+        " jb                    1b          \n\t"
+        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+        : "memory", "%"REG_a
+        );
+
+        ydst += lumStride;
+        src  += srcStride;
+
+        __asm__ volatile(
+        "xor                 %%"REG_a", %%"REG_a"   \n\t"
+        ASMALIGN(4)
+        "1:                                 \n\t"
+        PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
+        "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
+        "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
+        "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
+        "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
+        "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
+        "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
+        "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
+        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
+        "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
+        "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
+
+        MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
+        MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
+
+        "add                    $8, %%"REG_a"   \n\t"
+        "cmp                    %4, %%"REG_a"   \n\t"
+        " jb                    1b          \n\t"
+
+        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
+        : "memory", "%"REG_a
+        );
+#else
+        long i;
+        for (i=0; i<chromWidth; i++)
+        {
+            udst[i]     = src[4*i+0];
+            ydst[2*i+0] = src[4*i+1];
+            vdst[i]     = src[4*i+2];
+            ydst[2*i+1] = src[4*i+3];
+        }
+        ydst += lumStride;
+        src  += srcStride;
+
+        for (i=0; i<chromWidth; i++)
+        {
+            ydst[2*i+0] = src[4*i+1];
+            ydst[2*i+1] = src[4*i+3];
+        }
+#endif
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+#if HAVE_MMX
+__asm__ volatile(   EMMS"       \n\t"
+                SFENCE"     \n\t"
+                :::"memory");
+#endif
+}
+
+/**
+ * Height should be a multiple of 2 and width should be a multiple of 2.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line,
+ * others are ignored in the C version.
+ * FIXME: Write HQ version.
+ */
+static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                                       long width, long height,
+                                       long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= width>>1;
+#if HAVE_MMX
+    for (y=0; y<height-2; y+=2)
+    {
+        long i;
+        for (i=0; i<2; i++)
+        {
+            __asm__ volatile(
+            "mov                        %2, %%"REG_a"   \n\t"
+            "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
+            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
+            "pxor                    %%mm7, %%mm7       \n\t"
+            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
+            ASMALIGN(4)
+            "1:                                         \n\t"
+            PREFETCH"    64(%0, %%"REG_d")              \n\t"
+            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
+            "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
+            "punpcklbw               %%mm7, %%mm0       \n\t"
+            "punpcklbw               %%mm7, %%mm1       \n\t"
+            "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
+            "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm3       \n\t"
+            "pmaddwd                 %%mm6, %%mm0       \n\t"
+            "pmaddwd                 %%mm6, %%mm1       \n\t"
+            "pmaddwd                 %%mm6, %%mm2       \n\t"
+            "pmaddwd                 %%mm6, %%mm3       \n\t"
+#ifndef FAST_BGR2YV12
+            "psrad                      $8, %%mm0       \n\t"
+            "psrad                      $8, %%mm1       \n\t"
+            "psrad                      $8, %%mm2       \n\t"
+            "psrad                      $8, %%mm3       \n\t"
+#endif
+            "packssdw                %%mm1, %%mm0       \n\t"
+            "packssdw                %%mm3, %%mm2       \n\t"
+            "pmaddwd                 %%mm5, %%mm0       \n\t"
+            "pmaddwd                 %%mm5, %%mm2       \n\t"
+            "packssdw                %%mm2, %%mm0       \n\t"
+            "psraw                      $7, %%mm0       \n\t"
+
+            "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
+            "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
+            "punpcklbw               %%mm7, %%mm4       \n\t"
+            "punpcklbw               %%mm7, %%mm1       \n\t"
+            "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
+            "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
+            "punpcklbw               %%mm7, %%mm2       \n\t"
+            "punpcklbw               %%mm7, %%mm3       \n\t"
+            "pmaddwd                 %%mm6, %%mm4       \n\t"
+            "pmaddwd                 %%mm6, %%mm1       \n\t"
+            "pmaddwd                 %%mm6, %%mm2       \n\t"
+            "pmaddwd                 %%mm6, %%mm3       \n\t"
+#ifndef FAST_BGR2YV12
+            "psrad                      $8, %%mm4       \n\t"
+            "psrad                      $8, %%mm1       \n\t"
+            "psrad                      $8, %%mm2       \n\t"
+            "psrad                      $8, %%mm3       \n\t"
+#endif
+            "packssdw                %%mm1, %%mm4       \n\t"
+            "packssdw                %%mm3, %%mm2       \n\t"
+            "pmaddwd                 %%mm5, %%mm4       \n\t"
+            "pmaddwd                 %%mm5, %%mm2       \n\t"
+            "add                       $24, %%"REG_d"   \n\t"
+            "packssdw                %%mm2, %%mm4       \n\t"
+            "psraw                      $7, %%mm4       \n\t"
+
+            "packuswb                %%mm4, %%mm0       \n\t"
+            "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
+
+            MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
+            "add                        $8,      %%"REG_a"  \n\t"
+            " js                        1b                  \n\t"
+            : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
+            : "%"REG_a, "%"REG_d
+            );
+            ydst += lumStride;
+            src  += srcStride;
+        }
+        src -= srcStride*2;
+        __asm__ volatile(
+        "mov                        %4, %%"REG_a"   \n\t"
+        "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
+        "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
+        "pxor                    %%mm7, %%mm7       \n\t"
+        "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
+        "add                 %%"REG_d", %%"REG_d"   \n\t"
+        ASMALIGN(4)
+        "1:                                         \n\t"
+        PREFETCH"    64(%0, %%"REG_d")              \n\t"
+        PREFETCH"    64(%1, %%"REG_d")              \n\t"
+#if HAVE_MMX2 || HAVE_AMD3DNOW
+        "movq          (%0, %%"REG_d"), %%mm0       \n\t"
+        "movq          (%1, %%"REG_d"), %%mm1       \n\t"
+        "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
+        "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
+        PAVGB"                   %%mm1, %%mm0       \n\t"
+        PAVGB"                   %%mm3, %%mm2       \n\t"
+        "movq                    %%mm0, %%mm1       \n\t"
+        "movq                    %%mm2, %%mm3       \n\t"
+        "psrlq                     $24, %%mm0       \n\t"
+        "psrlq                     $24, %%mm2       \n\t"
+        PAVGB"                   %%mm1, %%mm0       \n\t"
+        PAVGB"                   %%mm3, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm0       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+#else
+        "movd          (%0, %%"REG_d"), %%mm0       \n\t"
+        "movd          (%1, %%"REG_d"), %%mm1       \n\t"
+        "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
+        "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
+        "punpcklbw               %%mm7, %%mm0       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "paddw                   %%mm1, %%mm0       \n\t"
+        "paddw                   %%mm3, %%mm2       \n\t"
+        "paddw                   %%mm2, %%mm0       \n\t"
+        "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
+        "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
+        "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
+        "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
+        "punpcklbw               %%mm7, %%mm4       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "paddw                   %%mm1, %%mm4       \n\t"
+        "paddw                   %%mm3, %%mm2       \n\t"
+        "paddw                   %%mm4, %%mm2       \n\t"
+        "psrlw                      $2, %%mm0       \n\t"
+        "psrlw                      $2, %%mm2       \n\t"
+#endif
+        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
+        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
+
+        "pmaddwd                 %%mm0, %%mm1       \n\t"
+        "pmaddwd                 %%mm2, %%mm3       \n\t"
+        "pmaddwd                 %%mm6, %%mm0       \n\t"
+        "pmaddwd                 %%mm6, %%mm2       \n\t"
+#ifndef FAST_BGR2YV12
+        "psrad                      $8, %%mm0       \n\t"
+        "psrad                      $8, %%mm1       \n\t"
+        "psrad                      $8, %%mm2       \n\t"
+        "psrad                      $8, %%mm3       \n\t"
+#endif
+        "packssdw                %%mm2, %%mm0       \n\t"
+        "packssdw                %%mm3, %%mm1       \n\t"
+        "pmaddwd                 %%mm5, %%mm0       \n\t"
+        "pmaddwd                 %%mm5, %%mm1       \n\t"
+        "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
+        "psraw                      $7, %%mm0       \n\t"
+
+#if HAVE_MMX2 || HAVE_AMD3DNOW
+        "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
+        "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
+        "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
+        "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
+        PAVGB"                   %%mm1, %%mm4       \n\t"
+        PAVGB"                   %%mm3, %%mm2       \n\t"
+        "movq                    %%mm4, %%mm1       \n\t"
+        "movq                    %%mm2, %%mm3       \n\t"
+        "psrlq                     $24, %%mm4       \n\t"
+        "psrlq                     $24, %%mm2       \n\t"
+        PAVGB"                   %%mm1, %%mm4       \n\t"
+        PAVGB"                   %%mm3, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm4       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+#else
+        "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
+        "movd        12(%1, %%"REG_d"), %%mm1       \n\t"
+        "movd        15(%0, %%"REG_d"), %%mm2       \n\t"
+        "movd        15(%1, %%"REG_d"), %%mm3       \n\t"
+        "punpcklbw               %%mm7, %%mm4       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "paddw                   %%mm1, %%mm4       \n\t"
+        "paddw                   %%mm3, %%mm2       \n\t"
+        "paddw                   %%mm2, %%mm4       \n\t"
+        "movd        18(%0, %%"REG_d"), %%mm5       \n\t"
+        "movd        18(%1, %%"REG_d"), %%mm1       \n\t"
+        "movd        21(%0, %%"REG_d"), %%mm2       \n\t"
+        "movd        21(%1, %%"REG_d"), %%mm3       \n\t"
+        "punpcklbw               %%mm7, %%mm5       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "paddw                   %%mm1, %%mm5       \n\t"
+        "paddw                   %%mm3, %%mm2       \n\t"
+        "paddw                   %%mm5, %%mm2       \n\t"
+        "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
+        "psrlw                      $2, %%mm4       \n\t"
+        "psrlw                      $2, %%mm2       \n\t"
+#endif
+        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
+        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
+
+        "pmaddwd                 %%mm4, %%mm1       \n\t"
+        "pmaddwd                 %%mm2, %%mm3       \n\t"
+        "pmaddwd                 %%mm6, %%mm4       \n\t"
+        "pmaddwd                 %%mm6, %%mm2       \n\t"
+#ifndef FAST_BGR2YV12
+        "psrad                      $8, %%mm4       \n\t"
+        "psrad                      $8, %%mm1       \n\t"
+        "psrad                      $8, %%mm2       \n\t"
+        "psrad                      $8, %%mm3       \n\t"
+#endif
+        "packssdw                %%mm2, %%mm4       \n\t"
+        "packssdw                %%mm3, %%mm1       \n\t"
+        "pmaddwd                 %%mm5, %%mm4       \n\t"
+        "pmaddwd                 %%mm5, %%mm1       \n\t"
+        "add                       $24, %%"REG_d"   \n\t"
+        "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
+        "psraw                      $7, %%mm4       \n\t"
+
+        "movq                    %%mm0, %%mm1           \n\t"
+        "punpckldq               %%mm4, %%mm0           \n\t"
+        "punpckhdq               %%mm4, %%mm1           \n\t"
+        "packsswb                %%mm1, %%mm0           \n\t"
+        "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t"
+        "movd                    %%mm0, (%2, %%"REG_a") \n\t"
+        "punpckhdq               %%mm0, %%mm0           \n\t"
+        "movd                    %%mm0, (%3, %%"REG_a") \n\t"
+        "add                        $4, %%"REG_a"       \n\t"
+        " js                        1b                  \n\t"
+        : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
+        : "%"REG_a, "%"REG_d
+        );
+
+        udst += chromStride;
+        vdst += chromStride;
+        src  += srcStride*2;
+    }
+
+    __asm__ volatile(   EMMS"       \n\t"
+                    SFENCE"     \n\t"
+                    :::"memory");
+#else
+    y=0;
+#endif
+    for (; y<height; y+=2)
+    {
+        long i;
+        for (i=0; i<chromWidth; i++)
+        {
+            unsigned int b = src[6*i+0];
+            unsigned int g = src[6*i+1];
+            unsigned int r = src[6*i+2];
+
+            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+            unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
+            unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2*i]   = Y;
+
+            b = src[6*i+3];
+            g = src[6*i+4];
+            r = src[6*i+5];
+
+            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+            ydst[2*i+1]     = Y;
+        }
+        ydst += lumStride;
+        src  += srcStride;
+
+        for (i=0; i<chromWidth; i++)
+        {
+            unsigned int b = src[6*i+0];
+            unsigned int g = src[6*i+1];
+            unsigned int r = src[6*i+2];
+
+            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+
+            ydst[2*i]     = Y;
+
+            b = src[6*i+3];
+            g = src[6*i+4];
+            r = src[6*i+5];
+
+            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+            ydst[2*i+1]     = Y;
+        }
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+}
+
+static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
+                             long width, long height, long src1Stride,
+                             long src2Stride, long dstStride){
+    long h;
+
+    for (h=0; h < height; h++)
+    {
+        long w;
+
+#if HAVE_MMX
+#if HAVE_SSE2
+        __asm__(
+        "xor              %%"REG_a", %%"REG_a"  \n\t"
+        "1:                                     \n\t"
+        PREFETCH" 64(%1, %%"REG_a")             \n\t"
+        PREFETCH" 64(%2, %%"REG_a")             \n\t"
+        "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
+        "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
+        "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
+        "punpcklbw           %%xmm2, %%xmm0     \n\t"
+        "punpckhbw           %%xmm2, %%xmm1     \n\t"
+        "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
+        "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
+        "add                    $16, %%"REG_a"  \n\t"
+        "cmp                     %3, %%"REG_a"  \n\t"
+        " jb                     1b             \n\t"
+        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
+        : "memory", "%"REG_a""
+        );
+#else
+        __asm__(
+        "xor %%"REG_a", %%"REG_a"               \n\t"
+        "1:                                     \n\t"
+        PREFETCH" 64(%1, %%"REG_a")             \n\t"
+        PREFETCH" 64(%2, %%"REG_a")             \n\t"
+        "movq       (%1, %%"REG_a"), %%mm0      \n\t"
+        "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
+        "movq                 %%mm0, %%mm1      \n\t"
+        "movq                 %%mm2, %%mm3      \n\t"
+        "movq       (%2, %%"REG_a"), %%mm4      \n\t"
+        "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
+        "punpcklbw            %%mm4, %%mm0      \n\t"
+        "punpckhbw            %%mm4, %%mm1      \n\t"
+        "punpcklbw            %%mm5, %%mm2      \n\t"
+        "punpckhbw            %%mm5, %%mm3      \n\t"
+        MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
+        MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
+        MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
+        MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
+        "add                    $16, %%"REG_a"  \n\t"
+        "cmp                     %3, %%"REG_a"  \n\t"
+        " jb                     1b             \n\t"
+        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
+        : "memory", "%"REG_a
+        );
+#endif
+        for (w= (width&(~15)); w < width; w++)
+        {
+            dest[2*w+0] = src1[w];
+            dest[2*w+1] = src2[w];
+        }
+#else
+        for (w=0; w < width; w++)
+        {
+            dest[2*w+0] = src1[w];
+            dest[2*w+1] = src2[w];
+        }
+#endif
+        dest += dstStride;
+                src1 += src1Stride;
+                src2 += src2Stride;
+    }
+#if HAVE_MMX
+    __asm__(
+        EMMS"       \n\t"
+        SFENCE"     \n\t"
+        ::: "memory"
+        );
+#endif
+}
+
+static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
+                                       uint8_t *dst1, uint8_t *dst2,
+                                       long width, long height,
+                                       long srcStride1, long srcStride2,
+                                       long dstStride1, long dstStride2)
+{
+    long y,x,w,h;
+    w=width/2; h=height/2;
+#if HAVE_MMX
+    __asm__ volatile(
+    PREFETCH" %0    \n\t"
+    PREFETCH" %1    \n\t"
+    ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
+#endif
+    for (y=0;y<h;y++){
+    const uint8_t* s1=src1+srcStride1*(y>>1);
+    uint8_t* d=dst1+dstStride1*y;
+    x=0;
+#if HAVE_MMX
+    for (;x<w-31;x+=32)
+    {
+        __asm__ volatile(
+        PREFETCH"   32%1        \n\t"
+        "movq         %1, %%mm0 \n\t"
+        "movq        8%1, %%mm2 \n\t"
+        "movq       16%1, %%mm4 \n\t"
+        "movq       24%1, %%mm6 \n\t"
+        "movq      %%mm0, %%mm1 \n\t"
+        "movq      %%mm2, %%mm3 \n\t"
+        "movq      %%mm4, %%mm5 \n\t"
+        "movq      %%mm6, %%mm7 \n\t"
+        "punpcklbw %%mm0, %%mm0 \n\t"
+        "punpckhbw %%mm1, %%mm1 \n\t"
+        "punpcklbw %%mm2, %%mm2 \n\t"
+        "punpckhbw %%mm3, %%mm3 \n\t"
+        "punpcklbw %%mm4, %%mm4 \n\t"
+        "punpckhbw %%mm5, %%mm5 \n\t"
+        "punpcklbw %%mm6, %%mm6 \n\t"
+        "punpckhbw %%mm7, %%mm7 \n\t"
+        MOVNTQ"    %%mm0,   %0  \n\t"
+        MOVNTQ"    %%mm1,  8%0  \n\t"
+        MOVNTQ"    %%mm2, 16%0  \n\t"
+        MOVNTQ"    %%mm3, 24%0  \n\t"
+        MOVNTQ"    %%mm4, 32%0  \n\t"
+        MOVNTQ"    %%mm5, 40%0  \n\t"
+        MOVNTQ"    %%mm6, 48%0  \n\t"
+        MOVNTQ"    %%mm7, 56%0"
+        :"=m"(d[2*x])
+        :"m"(s1[x])
+        :"memory");
+    }
+#endif
+    for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
+    }
+    for (y=0;y<h;y++){
+    const uint8_t* s2=src2+srcStride2*(y>>1);
+    uint8_t* d=dst2+dstStride2*y;
+    x=0;
+#if HAVE_MMX
+    for (;x<w-31;x+=32)
+    {
+        __asm__ volatile(
+        PREFETCH"   32%1        \n\t"
+        "movq         %1, %%mm0 \n\t"
+        "movq        8%1, %%mm2 \n\t"
+        "movq       16%1, %%mm4 \n\t"
+        "movq       24%1, %%mm6 \n\t"
+        "movq      %%mm0, %%mm1 \n\t"
+        "movq      %%mm2, %%mm3 \n\t"
+        "movq      %%mm4, %%mm5 \n\t"
+        "movq      %%mm6, %%mm7 \n\t"
+        "punpcklbw %%mm0, %%mm0 \n\t"
+        "punpckhbw %%mm1, %%mm1 \n\t"
+        "punpcklbw %%mm2, %%mm2 \n\t"
+        "punpckhbw %%mm3, %%mm3 \n\t"
+        "punpcklbw %%mm4, %%mm4 \n\t"
+        "punpckhbw %%mm5, %%mm5 \n\t"
+        "punpcklbw %%mm6, %%mm6 \n\t"
+        "punpckhbw %%mm7, %%mm7 \n\t"
+        MOVNTQ"    %%mm0,   %0  \n\t"
+        MOVNTQ"    %%mm1,  8%0  \n\t"
+        MOVNTQ"    %%mm2, 16%0  \n\t"
+        MOVNTQ"    %%mm3, 24%0  \n\t"
+        MOVNTQ"    %%mm4, 32%0  \n\t"
+        MOVNTQ"    %%mm5, 40%0  \n\t"
+        MOVNTQ"    %%mm6, 48%0  \n\t"
+        MOVNTQ"    %%mm7, 56%0"
+        :"=m"(d[2*x])
+        :"m"(s2[x])
+        :"memory");
+    }
+#endif
+    for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
+    }
+#if HAVE_MMX
+    __asm__(
+        EMMS"       \n\t"
+        SFENCE"     \n\t"
+        ::: "memory"
+        );
+#endif
+}
+
+static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+                                        uint8_t *dst,
+                                        long width, long height,
+                                        long srcStride1, long srcStride2,
+                                        long srcStride3, long dstStride)
+{
+    long y,x,w,h;
+    w=width/2; h=height;
+    for (y=0;y<h;y++){
+    const uint8_t* yp=src1+srcStride1*y;
+    const uint8_t* up=src2+srcStride2*(y>>2);
+    const uint8_t* vp=src3+srcStride3*(y>>2);
+    uint8_t* d=dst+dstStride*y;
+    x=0;
+#if HAVE_MMX
+    for (;x<w-7;x+=8)
+    {
+        __asm__ volatile(
+        PREFETCH"   32(%1, %0)          \n\t"
+        PREFETCH"   32(%2, %0)          \n\t"
+        PREFETCH"   32(%3, %0)          \n\t"
+        "movq      (%1, %0, 4), %%mm0   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
+        "movq         (%2, %0), %%mm1   \n\t" /* U0U1U2U3U4U5U6U7 */
+        "movq         (%3, %0), %%mm2   \n\t" /* V0V1V2V3V4V5V6V7 */
+        "movq            %%mm0, %%mm3   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
+        "movq            %%mm1, %%mm4   \n\t" /* U0U1U2U3U4U5U6U7 */
+        "movq            %%mm2, %%mm5   \n\t" /* V0V1V2V3V4V5V6V7 */
+        "punpcklbw       %%mm1, %%mm1   \n\t" /* U0U0 U1U1 U2U2 U3U3 */
+        "punpcklbw       %%mm2, %%mm2   \n\t" /* V0V0 V1V1 V2V2 V3V3 */
+        "punpckhbw       %%mm4, %%mm4   \n\t" /* U4U4 U5U5 U6U6 U7U7 */
+        "punpckhbw       %%mm5, %%mm5   \n\t" /* V4V4 V5V5 V6V6 V7V7 */
+
+        "movq            %%mm1, %%mm6   \n\t"
+        "punpcklbw       %%mm2, %%mm1   \n\t" /* U0V0 U0V0 U1V1 U1V1*/
+        "punpcklbw       %%mm1, %%mm0   \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
+        "punpckhbw       %%mm1, %%mm3   \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
+        MOVNTQ"          %%mm0,  (%4, %0, 8)    \n\t"
+        MOVNTQ"          %%mm3, 8(%4, %0, 8)    \n\t"
+
+        "punpckhbw       %%mm2, %%mm6   \n\t" /* U2V2 U2V2 U3V3 U3V3*/
+        "movq     8(%1, %0, 4), %%mm0   \n\t"
+        "movq            %%mm0, %%mm3   \n\t"
+        "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U2 Y V2 Y U2 Y V2*/
+        "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U3 Y V3 Y U3 Y V3*/
+        MOVNTQ"          %%mm0, 16(%4, %0, 8)   \n\t"
+        MOVNTQ"          %%mm3, 24(%4, %0, 8)   \n\t"
+
+        "movq            %%mm4, %%mm6   \n\t"
+        "movq    16(%1, %0, 4), %%mm0   \n\t"
+        "movq            %%mm0, %%mm3   \n\t"
+        "punpcklbw       %%mm5, %%mm4   \n\t"
+        "punpcklbw       %%mm4, %%mm0   \n\t" /* Y U4 Y V4 Y U4 Y V4*/
+        "punpckhbw       %%mm4, %%mm3   \n\t" /* Y U5 Y V5 Y U5 Y V5*/
+        MOVNTQ"          %%mm0, 32(%4, %0, 8)   \n\t"
+        MOVNTQ"          %%mm3, 40(%4, %0, 8)   \n\t"
+
+        "punpckhbw       %%mm5, %%mm6   \n\t"
+        "movq    24(%1, %0, 4), %%mm0   \n\t"
+        "movq            %%mm0, %%mm3   \n\t"
+        "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U6 Y V6 Y U6 Y V6*/
+        "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U7 Y V7 Y U7 Y V7*/
+        MOVNTQ"          %%mm0, 48(%4, %0, 8)   \n\t"
+        MOVNTQ"          %%mm3, 56(%4, %0, 8)   \n\t"
+
+        : "+r" (x)
+        : "r"(yp), "r" (up), "r"(vp), "r"(d)
+        :"memory");
+    }
+#endif
+    for (; x<w; x++)
+    {
+        const long x2 = x<<2;
+        d[8*x+0] = yp[x2];
+        d[8*x+1] = up[x];
+        d[8*x+2] = yp[x2+1];
+        d[8*x+3] = vp[x];
+        d[8*x+4] = yp[x2+2];
+        d[8*x+5] = up[x];
+        d[8*x+6] = yp[x2+3];
+        d[8*x+7] = vp[x];
+    }
+    }
+#if HAVE_MMX
+    __asm__(
+        EMMS"       \n\t"
+        SFENCE"     \n\t"
+        ::: "memory"
+        );
+#endif
+}
+
+static inline void RENAME(rgb2rgb_init)(void){
+    rgb15to16       = RENAME(rgb15to16);
+    rgb15tobgr24    = RENAME(rgb15tobgr24);
+    rgb15to32       = RENAME(rgb15to32);
+    rgb16tobgr24    = RENAME(rgb16tobgr24);
+    rgb16to32       = RENAME(rgb16to32);
+    rgb16to15       = RENAME(rgb16to15);
+    rgb24tobgr16    = RENAME(rgb24tobgr16);
+    rgb24tobgr15    = RENAME(rgb24tobgr15);
+    rgb24tobgr32    = RENAME(rgb24tobgr32);
+    rgb32to16       = RENAME(rgb32to16);
+    rgb32to15       = RENAME(rgb32to15);
+    rgb32tobgr24    = RENAME(rgb32tobgr24);
+    rgb24to15       = RENAME(rgb24to15);
+    rgb24to16       = RENAME(rgb24to16);
+    rgb24tobgr24    = RENAME(rgb24tobgr24);
+    rgb32tobgr32    = RENAME(rgb32tobgr32);
+    rgb32tobgr16    = RENAME(rgb32tobgr16);
+    rgb32tobgr15    = RENAME(rgb32tobgr15);
+    yv12toyuy2      = RENAME(yv12toyuy2);
+    yv12touyvy      = RENAME(yv12touyvy);
+    yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
+    yuv422ptouyvy   = RENAME(yuv422ptouyvy);
+    yuy2toyv12      = RENAME(yuy2toyv12);
+//    uyvytoyv12      = RENAME(uyvytoyv12);
+//    yvu9toyv12      = RENAME(yvu9toyv12);
+    planar2x        = RENAME(planar2x);
+    rgb24toyv12     = RENAME(rgb24toyv12);
+    interleaveBytes = RENAME(interleaveBytes);
+    vu9_to_vu12     = RENAME(vu9_to_vu12);
+    yvu9_to_yuy2    = RENAME(yvu9_to_yuy2);
+}
diff --git a/libswscale/swscale-example.c b/libswscale/swscale-example.c
new file mode 100644
index 0000000000..87b9ba027d
--- /dev/null
+++ b/libswscale/swscale-example.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdarg.h>
+
+#undef HAVE_AV_CONFIG_H
+#include "libavutil/avutil.h"
+#include "swscale.h"
+#include "swscale_internal.h"
+
+static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){
+    int x,y;
+    uint64_t ssd=0;
+
+//printf("%d %d\n", w, h);
+
+    for (y=0; y<h; y++){
+        for (x=0; x<w; x++){
+            int d= src1[x + y*stride1] - src2[x + y*stride2];
+            ssd+= d*d;
+//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 );
+        }
+//printf("\n");
+    }
+    return ssd;
+}
+
+// test by ref -> src -> dst -> out & compare out against ref
+// ref & out are YV12
+static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat,
+                  int srcW, int srcH, int dstW, int dstH, int flags){
+    uint8_t *src[3];
+    uint8_t *dst[3];
+    uint8_t *out[3];
+    int srcStride[3], dstStride[3];
+    int i;
+    uint64_t ssdY, ssdU, ssdV;
+    struct SwsContext *srcContext, *dstContext, *outContext;
+    int res;
+
+    res = 0;
+    for (i=0; i<3; i++){
+        // avoid stride % bpp != 0
+        if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24)
+            srcStride[i]= srcW*3;
+        else
+            srcStride[i]= srcW*4;
+
+        if (dstFormat==PIX_FMT_RGB24 || dstFormat==PIX_FMT_BGR24)
+            dstStride[i]= dstW*3;
+        else
+            dstStride[i]= dstW*4;
+
+        src[i]= (uint8_t*) malloc(srcStride[i]*srcH);
+        dst[i]= (uint8_t*) malloc(dstStride[i]*dstH);
+        out[i]= (uint8_t*) malloc(refStride[i]*h);
+        if (!src[i] || !dst[i] || !out[i]) {
+            perror("Malloc");
+            res = -1;
+
+            goto end;
+        }
+    }
+
+    dstContext = outContext = NULL;
+    srcContext= sws_getContext(w, h, PIX_FMT_YUV420P, srcW, srcH, srcFormat, flags, NULL, NULL, NULL);
+    if (!srcContext) {
+        fprintf(stderr, "Failed to get %s ---> %s\n",
+                sws_format_name(PIX_FMT_YUV420P),
+                sws_format_name(srcFormat));
+        res = -1;
+
+        goto end;
+    }
+    dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL, NULL);
+    if (!dstContext) {
+        fprintf(stderr, "Failed to get %s ---> %s\n",
+                sws_format_name(srcFormat),
+                sws_format_name(dstFormat));
+        res = -1;
+
+        goto end;
+    }
+    outContext= sws_getContext(dstW, dstH, dstFormat, w, h, PIX_FMT_YUV420P, flags, NULL, NULL, NULL);
+    if (!outContext) {
+        fprintf(stderr, "Failed to get %s ---> %s\n",
+                sws_format_name(dstFormat),
+                sws_format_name(PIX_FMT_YUV420P));
+        res = -1;
+
+        goto end;
+    }
+//    printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
+//        (int)src[0], (int)src[1], (int)src[2]);
+
+    sws_scale(srcContext, ref, refStride, 0, h   , src, srcStride);
+    sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
+    sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride);
+
+    ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
+    ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
+    ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
+
+    if (srcFormat == PIX_FMT_GRAY8 || dstFormat==PIX_FMT_GRAY8) ssdU=ssdV=0; //FIXME check that output is really gray
+
+    ssdY/= w*h;
+    ssdU/= w*h/4;
+    ssdV/= w*h/4;
+
+    printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n",
+           sws_format_name(srcFormat), srcW, srcH,
+           sws_format_name(dstFormat), dstW, dstH,
+           flags, ssdY, ssdU, ssdV);
+    fflush(stdout);
+
+    end:
+
+    sws_freeContext(srcContext);
+    sws_freeContext(dstContext);
+    sws_freeContext(outContext);
+
+    for (i=0; i<3; i++){
+        free(src[i]);
+        free(dst[i]);
+        free(out[i]);
+    }
+
+    return res;
+}
+
+static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
+    enum PixelFormat srcFormat, dstFormat;
+    int srcW, srcH, dstW, dstH;
+    int flags;
+
+    for (srcFormat = 0; srcFormat < PIX_FMT_NB; srcFormat++) {
+        for (dstFormat = 0; dstFormat < PIX_FMT_NB; dstFormat++) {
+            printf("%s -> %s\n",
+                   sws_format_name(srcFormat),
+                   sws_format_name(dstFormat));
+            fflush(stdout);
+
+            srcW= w;
+            srcH= h;
+            for (dstW=w - w/3; dstW<= 4*w/3; dstW+= w/3){
+                for (dstH=h - h/3; dstH<= 4*h/3; dstH+= h/3){
+                    for (flags=1; flags<33; flags*=2) {
+                        int res;
+
+                        res = doTest(src, stride, w, h, srcFormat, dstFormat,
+                                     srcW, srcH, dstW, dstH, flags);
+                        if (res < 0) {
+                            dstW = 4 * w / 3;
+                            dstH = 4 * h / 3;
+                            flags = 33;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+#define W 96
+#define H 96
+
+int main(int argc, char **argv){
+    uint8_t *rgb_data = malloc (W*H*4);
+    uint8_t *rgb_src[3]= {rgb_data, NULL, NULL};
+    int rgb_stride[3]={4*W, 0, 0};
+    uint8_t *data = malloc (3*W*H);
+    uint8_t *src[3]= {data, data+W*H, data+W*H*2};
+    int stride[3]={W, W, W};
+    int x, y;
+    struct SwsContext *sws;
+
+    sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUV420P, 2, NULL, NULL, NULL);
+
+    for (y=0; y<H; y++){
+        for (x=0; x<W*4; x++){
+            rgb_data[ x + y*4*W]= random();
+        }
+    }
+    sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
+
+    selfTest(src, stride, W, H);
+
+    return 123;
+}
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
new file mode 100644
index 0000000000..7c335f1680
--- /dev/null
+++ b/libswscale/swscale.c
@@ -0,0 +1,3198 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * the C code (not assembly, mmx, ...) of this file can be used
+ * under the LGPL license too
+ */
+
+/*
+  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
+  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
+  {BGR,RGB}{1,4,8,15,16} support dithering
+
+  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
+  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
+  x -> x
+  YUV9 -> YV12
+  YUV9/YV12 -> Y800
+  Y800 -> YUV9/YV12
+  BGR24 -> BGR32 & RGB24 -> RGB32
+  BGR32 -> BGR24 & RGB32 -> RGB24
+  BGR15 -> BGR16
+*/
+
+/*
+tested special converters (most are tested actually, but I did not write it down ...)
+ YV12 -> BGR16
+ YV12 -> YV12
+ BGR15 -> BGR16
+ BGR16 -> BGR16
+ YVU9 -> YV12
+
+untested special converters
+  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
+  YV12/I420 -> YV12/I420
+  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
+  BGR24 -> BGR32 & RGB24 -> RGB32
+  BGR32 -> BGR24 & RGB32 -> RGB24
+  BGR24 -> YV12
+*/
+
+#define _SVID_SOURCE //needed for MAP_ANONYMOUS
+#include <inttypes.h>
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "config.h"
+#include <assert.h>
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#endif
+#include "swscale.h"
+#include "swscale_internal.h"
+#include "rgb2rgb.h"
+#include "libavutil/x86_cpu.h"
+#include "libavutil/bswap.h"
+
+unsigned swscale_version(void)
+{
+    return LIBSWSCALE_VERSION_INT;
+}
+
+#undef MOVNTQ
+#undef PAVGB
+
+//#undef HAVE_MMX2
+//#define HAVE_AMD3DNOW
+//#undef HAVE_MMX
+//#undef ARCH_X86
+//#define WORDS_BIGENDIAN
+#define DITHER1XBPP
+
+#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
+
+#define RET 0xC3 //near return opcode for x86
+
+#ifdef M_PI
+#define PI M_PI
+#else
+#define PI 3.14159265358979323846
+#endif
+
+#define isSupportedIn(x)    (       \
+           (x)==PIX_FMT_YUV420P     \
+        || (x)==PIX_FMT_YUVA420P    \
+        || (x)==PIX_FMT_YUYV422     \
+        || (x)==PIX_FMT_UYVY422     \
+        || (x)==PIX_FMT_RGB32       \
+        || (x)==PIX_FMT_RGB32_1     \
+        || (x)==PIX_FMT_BGR24       \
+        || (x)==PIX_FMT_BGR565      \
+        || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR32       \
+        || (x)==PIX_FMT_BGR32_1     \
+        || (x)==PIX_FMT_RGB24       \
+        || (x)==PIX_FMT_RGB565      \
+        || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_GRAY8       \
+        || (x)==PIX_FMT_YUV410P     \
+        || (x)==PIX_FMT_YUV440P     \
+        || (x)==PIX_FMT_GRAY16BE    \
+        || (x)==PIX_FMT_GRAY16LE    \
+        || (x)==PIX_FMT_YUV444P     \
+        || (x)==PIX_FMT_YUV422P     \
+        || (x)==PIX_FMT_YUV411P     \
+        || (x)==PIX_FMT_PAL8        \
+        || (x)==PIX_FMT_BGR8        \
+        || (x)==PIX_FMT_RGB8        \
+        || (x)==PIX_FMT_BGR4_BYTE   \
+        || (x)==PIX_FMT_RGB4_BYTE   \
+        || (x)==PIX_FMT_YUV440P     \
+        || (x)==PIX_FMT_MONOWHITE   \
+        || (x)==PIX_FMT_MONOBLACK   \
+    )
+#define isSupportedOut(x)   (       \
+           (x)==PIX_FMT_YUV420P     \
+        || (x)==PIX_FMT_YUYV422     \
+        || (x)==PIX_FMT_UYVY422     \
+        || (x)==PIX_FMT_YUV444P     \
+        || (x)==PIX_FMT_YUV422P     \
+        || (x)==PIX_FMT_YUV411P     \
+        || isRGB(x)                 \
+        || isBGR(x)                 \
+        || (x)==PIX_FMT_NV12        \
+        || (x)==PIX_FMT_NV21        \
+        || (x)==PIX_FMT_GRAY16BE    \
+        || (x)==PIX_FMT_GRAY16LE    \
+        || (x)==PIX_FMT_GRAY8       \
+        || (x)==PIX_FMT_YUV410P     \
+        || (x)==PIX_FMT_YUV440P     \
+    )
+#define isPacked(x)         (       \
+           (x)==PIX_FMT_PAL8        \
+        || (x)==PIX_FMT_YUYV422     \
+        || (x)==PIX_FMT_UYVY422     \
+        || isRGB(x)                 \
+        || isBGR(x)                 \
+    )
+#define usePal(x)           (       \
+           (x)==PIX_FMT_PAL8        \
+        || (x)==PIX_FMT_BGR4_BYTE   \
+        || (x)==PIX_FMT_RGB4_BYTE   \
+        || (x)==PIX_FMT_BGR8        \
+        || (x)==PIX_FMT_RGB8        \
+    )
+
+#define RGB2YUV_SHIFT 15
+#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
+
+extern const int32_t ff_yuv2rgb_coeffs[8][4];
+
+static const double rgb2yuv_table[8][9]={
+    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
+    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
+    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
+    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
+    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
+    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
+    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
+    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
+};
+
+/*
+NOTES
+Special versions: fast Y 1:1 scaling (no interpolation in y direction)
+
+TODO
+more intelligent misalignment avoidance for the horizontal scaler
+write special vertical cubic upscale version
+optimize C code (YV12 / minmax)
+add support for packed pixel YUV input & output
+add support for Y8 output
+optimize BGR24 & BGR32
+add BGR4 output support
+write special BGR->BGR scaler
+*/
+
+#if ARCH_X86 && CONFIG_GPL
+DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
+DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
+DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
+DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
+DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
+DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
+DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
+DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
+        0x0103010301030103LL,
+        0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
+        0x0602060206020602LL,
+        0x0004000400040004LL,};
+
+DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
+DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
+DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
+DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
+DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
+DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
+
+DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
+DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
+DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
+
+#ifdef FAST_BGR2YV12
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
+#else
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
+#endif /* FAST_BGR2YV12 */
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
+    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
+    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
+};
+
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
+
+#endif /* ARCH_X86 && CONFIG_GPL */
+
+// clipping helper table for C implementations:
+static unsigned char clip_table[768];
+
+static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
+
+static const uint8_t  __attribute__((aligned(8))) dither_2x2_4[2][8]={
+{  1,   3,   1,   3,   1,   3,   1,   3, },
+{  2,   0,   2,   0,   2,   0,   2,   0, },
+};
+
+static const uint8_t  __attribute__((aligned(8))) dither_2x2_8[2][8]={
+{  6,   2,   6,   2,   6,   2,   6,   2, },
+{  0,   4,   0,   4,   0,   4,   0,   4, },
+};
+
+const uint8_t  __attribute__((aligned(8))) dither_8x8_32[8][8]={
+{ 17,   9,  23,  15,  16,   8,  22,  14, },
+{  5,  29,   3,  27,   4,  28,   2,  26, },
+{ 21,  13,  19,  11,  20,  12,  18,  10, },
+{  0,  24,   6,  30,   1,  25,   7,  31, },
+{ 16,   8,  22,  14,  17,   9,  23,  15, },
+{  4,  28,   2,  26,   5,  29,   3,  27, },
+{ 20,  12,  18,  10,  21,  13,  19,  11, },
+{  1,  25,   7,  31,   0,  24,   6,  30, },
+};
+
+#if 0
+const uint8_t  __attribute__((aligned(8))) dither_8x8_64[8][8]={
+{  0,  48,  12,  60,   3,  51,  15,  63, },
+{ 32,  16,  44,  28,  35,  19,  47,  31, },
+{  8,  56,   4,  52,  11,  59,   7,  55, },
+{ 40,  24,  36,  20,  43,  27,  39,  23, },
+{  2,  50,  14,  62,   1,  49,  13,  61, },
+{ 34,  18,  46,  30,  33,  17,  45,  29, },
+{ 10,  58,   6,  54,   9,  57,   5,  53, },
+{ 42,  26,  38,  22,  41,  25,  37,  21, },
+};
+#endif
+
+const uint8_t  __attribute__((aligned(8))) dither_8x8_73[8][8]={
+{  0,  55,  14,  68,   3,  58,  17,  72, },
+{ 37,  18,  50,  32,  40,  22,  54,  35, },
+{  9,  64,   5,  59,  13,  67,   8,  63, },
+{ 46,  27,  41,  23,  49,  31,  44,  26, },
+{  2,  57,  16,  71,   1,  56,  15,  70, },
+{ 39,  21,  52,  34,  38,  19,  51,  33, },
+{ 11,  66,   7,  62,  10,  65,   6,  60, },
+{ 48,  30,  43,  25,  47,  29,  42,  24, },
+};
+
+#if 0
+const uint8_t  __attribute__((aligned(8))) dither_8x8_128[8][8]={
+{ 68,  36,  92,  60,  66,  34,  90,  58, },
+{ 20, 116,  12, 108,  18, 114,  10, 106, },
+{ 84,  52,  76,  44,  82,  50,  74,  42, },
+{  0,  96,  24, 120,   6, 102,  30, 126, },
+{ 64,  32,  88,  56,  70,  38,  94,  62, },
+{ 16, 112,   8, 104,  22, 118,  14, 110, },
+{ 80,  48,  72,  40,  86,  54,  78,  46, },
+{  4, 100,  28, 124,   2,  98,  26, 122, },
+};
+#endif
+
+#if 1
+const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{117,  62, 158, 103, 113,  58, 155, 100, },
+{ 34, 199,  21, 186,  31, 196,  17, 182, },
+{144,  89, 131,  76, 141,  86, 127,  72, },
+{  0, 165,  41, 206,  10, 175,  52, 217, },
+{110,  55, 151,  96, 120,  65, 162, 107, },
+{ 28, 193,  14, 179,  38, 203,  24, 189, },
+{138,  83, 124,  69, 148,  93, 134,  79, },
+{  7, 172,  48, 213,   3, 168,  45, 210, },
+};
+#elif 1
+// tries to correct a gamma of 1.5
+const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{  0, 143,  18, 200,   2, 156,  25, 215, },
+{ 78,  28, 125,  64,  89,  36, 138,  74, },
+{ 10, 180,   3, 161,  16, 195,   8, 175, },
+{109,  51,  93,  38, 121,  60, 105,  47, },
+{  1, 152,  23, 210,   0, 147,  20, 205, },
+{ 85,  33, 134,  71,  81,  30, 130,  67, },
+{ 14, 190,   6, 171,  12, 185,   5, 166, },
+{117,  57, 101,  44, 113,  54,  97,  41, },
+};
+#elif 1
+// tries to correct a gamma of 2.0
+const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{  0, 124,   8, 193,   0, 140,  12, 213, },
+{ 55,  14, 104,  42,  66,  19, 119,  52, },
+{  3, 168,   1, 145,   6, 187,   3, 162, },
+{ 86,  31,  70,  21,  99,  39,  82,  28, },
+{  0, 134,  11, 206,   0, 129,   9, 200, },
+{ 62,  17, 114,  48,  58,  16, 109,  45, },
+{  5, 181,   2, 157,   4, 175,   1, 151, },
+{ 95,  36,  78,  26,  90,  34,  74,  24, },
+};
+#else
+// tries to correct a gamma of 2.5
+const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
+{  0, 107,   3, 187,   0, 125,   6, 212, },
+{ 39,   7,  86,  28,  49,  11, 102,  36, },
+{  1, 158,   0, 131,   3, 180,   1, 151, },
+{ 68,  19,  52,  12,  81,  25,  64,  17, },
+{  0, 119,   5, 203,   0, 113,   4, 195, },
+{ 45,   9,  96,  33,  42,   8,  91,  30, },
+{  2, 172,   1, 144,   2, 165,   0, 137, },
+{ 77,  23,  60,  15,  72,  21,  56,  14, },
+};
+#endif
+
+const char *sws_format_name(enum PixelFormat format)
+{
+    switch (format) {
+        case PIX_FMT_YUV420P:
+            return "yuv420p";
+        case PIX_FMT_YUVA420P:
+            return "yuva420p";
+        case PIX_FMT_YUYV422:
+            return "yuyv422";
+        case PIX_FMT_RGB24:
+            return "rgb24";
+        case PIX_FMT_BGR24:
+            return "bgr24";
+        case PIX_FMT_YUV422P:
+            return "yuv422p";
+        case PIX_FMT_YUV444P:
+            return "yuv444p";
+        case PIX_FMT_RGB32:
+            return "rgb32";
+        case PIX_FMT_YUV410P:
+            return "yuv410p";
+        case PIX_FMT_YUV411P:
+            return "yuv411p";
+        case PIX_FMT_RGB565:
+            return "rgb565";
+        case PIX_FMT_RGB555:
+            return "rgb555";
+        case PIX_FMT_GRAY16BE:
+            return "gray16be";
+        case PIX_FMT_GRAY16LE:
+            return "gray16le";
+        case PIX_FMT_GRAY8:
+            return "gray8";
+        case PIX_FMT_MONOWHITE:
+            return "mono white";
+        case PIX_FMT_MONOBLACK:
+            return "mono black";
+        case PIX_FMT_PAL8:
+            return "Palette";
+        case PIX_FMT_YUVJ420P:
+            return "yuvj420p";
+        case PIX_FMT_YUVJ422P:
+            return "yuvj422p";
+        case PIX_FMT_YUVJ444P:
+            return "yuvj444p";
+        case PIX_FMT_XVMC_MPEG2_MC:
+            return "xvmc_mpeg2_mc";
+        case PIX_FMT_XVMC_MPEG2_IDCT:
+            return "xvmc_mpeg2_idct";
+        case PIX_FMT_UYVY422:
+            return "uyvy422";
+        case PIX_FMT_UYYVYY411:
+            return "uyyvyy411";
+        case PIX_FMT_RGB32_1:
+            return "rgb32x";
+        case PIX_FMT_BGR32_1:
+            return "bgr32x";
+        case PIX_FMT_BGR32:
+            return "bgr32";
+        case PIX_FMT_BGR565:
+            return "bgr565";
+        case PIX_FMT_BGR555:
+            return "bgr555";
+        case PIX_FMT_BGR8:
+            return "bgr8";
+        case PIX_FMT_BGR4:
+            return "bgr4";
+        case PIX_FMT_BGR4_BYTE:
+            return "bgr4 byte";
+        case PIX_FMT_RGB8:
+            return "rgb8";
+        case PIX_FMT_RGB4:
+            return "rgb4";
+        case PIX_FMT_RGB4_BYTE:
+            return "rgb4 byte";
+        case PIX_FMT_NV12:
+            return "nv12";
+        case PIX_FMT_NV21:
+            return "nv21";
+        case PIX_FMT_YUV440P:
+            return "yuv440p";
+        case PIX_FMT_VDPAU_H264:
+            return "vdpau_h264";
+        case PIX_FMT_VDPAU_MPEG1:
+            return "vdpau_mpeg1";
+        case PIX_FMT_VDPAU_MPEG2:
+            return "vdpau_mpeg2";
+        case PIX_FMT_VDPAU_WMV3:
+            return "vdpau_wmv3";
+        case PIX_FMT_VDPAU_VC1:
+            return "vdpau_vc1";
+        default:
+            return "Unknown format";
+    }
+}
+
+static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
+{
+    //FIXME Optimize (just quickly written not optimized..)
+    int i;
+    for (i=0; i<dstW; i++)
+    {
+        int val=1<<18;
+        int j;
+        for (j=0; j<lumFilterSize; j++)
+            val += lumSrc[j][i] * lumFilter[j];
+
+        dest[i]= av_clip_uint8(val>>19);
+    }
+
+    if (uDest)
+        for (i=0; i<chrDstW; i++)
+        {
+            int u=1<<18;
+            int v=1<<18;
+            int j;
+            for (j=0; j<chrFilterSize; j++)
+            {
+                u += chrSrc[j][i] * chrFilter[j];
+                v += chrSrc[j][i + VOFW] * chrFilter[j];
+            }
+
+            uDest[i]= av_clip_uint8(u>>19);
+            vDest[i]= av_clip_uint8(v>>19);
+        }
+}
+
+static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
+{
+    //FIXME Optimize (just quickly written not optimized..)
+    int i;
+    for (i=0; i<dstW; i++)
+    {
+        int val=1<<18;
+        int j;
+        for (j=0; j<lumFilterSize; j++)
+            val += lumSrc[j][i] * lumFilter[j];
+
+        dest[i]= av_clip_uint8(val>>19);
+    }
+
+    if (!uDest)
+        return;
+
+    if (dstFormat == PIX_FMT_NV12)
+        for (i=0; i<chrDstW; i++)
+        {
+            int u=1<<18;
+            int v=1<<18;
+            int j;
+            for (j=0; j<chrFilterSize; j++)
+            {
+                u += chrSrc[j][i] * chrFilter[j];
+                v += chrSrc[j][i + VOFW] * chrFilter[j];
+            }
+
+            uDest[2*i]= av_clip_uint8(u>>19);
+            uDest[2*i+1]= av_clip_uint8(v>>19);
+        }
+    else
+        for (i=0; i<chrDstW; i++)
+        {
+            int u=1<<18;
+            int v=1<<18;
+            int j;
+            for (j=0; j<chrFilterSize; j++)
+            {
+                u += chrSrc[j][i] * chrFilter[j];
+                v += chrSrc[j][i + VOFW] * chrFilter[j];
+            }
+
+            uDest[2*i]= av_clip_uint8(v>>19);
+            uDest[2*i+1]= av_clip_uint8(u>>19);
+        }
+}
+
+#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
+    for (i=0; i<(dstW>>1); i++){\
+        int j;\
+        int Y1 = 1<<18;\
+        int Y2 = 1<<18;\
+        int U  = 1<<18;\
+        int V  = 1<<18;\
+        type av_unused *r, *b, *g;\
+        const int i2= 2*i;\
+        \
+        for (j=0; j<lumFilterSize; j++)\
+        {\
+            Y1 += lumSrc[j][i2] * lumFilter[j];\
+            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
+        }\
+        for (j=0; j<chrFilterSize; j++)\
+        {\
+            U += chrSrc[j][i] * chrFilter[j];\
+            V += chrSrc[j][i+VOFW] * chrFilter[j];\
+        }\
+        Y1>>=19;\
+        Y2>>=19;\
+        U >>=19;\
+        V >>=19;\
+
+#define YSCALE_YUV_2_PACKEDX_C(type) \
+        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
+        if ((Y1|Y2|U|V)&256)\
+        {\
+            if (Y1>255)   Y1=255; \
+            else if (Y1<0)Y1=0;   \
+            if (Y2>255)   Y2=255; \
+            else if (Y2<0)Y2=0;   \
+            if (U>255)    U=255;  \
+            else if (U<0) U=0;    \
+            if (V>255)    V=255;  \
+            else if (V<0) V=0;    \
+        }
+
+#define YSCALE_YUV_2_PACKEDX_FULL_C \
+    for (i=0; i<dstW; i++){\
+        int j;\
+        int Y = 0;\
+        int U = -128<<19;\
+        int V = -128<<19;\
+        int R,G,B;\
+        \
+        for (j=0; j<lumFilterSize; j++){\
+            Y += lumSrc[j][i     ] * lumFilter[j];\
+        }\
+        for (j=0; j<chrFilterSize; j++){\
+            U += chrSrc[j][i     ] * chrFilter[j];\
+            V += chrSrc[j][i+VOFW] * chrFilter[j];\
+        }\
+        Y >>=10;\
+        U >>=10;\
+        V >>=10;\
+
+#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
+    YSCALE_YUV_2_PACKEDX_FULL_C\
+        Y-= c->yuv2rgb_y_offset;\
+        Y*= c->yuv2rgb_y_coeff;\
+        Y+= rnd;\
+        R= Y + V*c->yuv2rgb_v2r_coeff;\
+        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
+        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
+        if ((R|G|B)&(0xC0000000)){\
+            if (R>=(256<<22))   R=(256<<22)-1; \
+            else if (R<0)R=0;   \
+            if (G>=(256<<22))   G=(256<<22)-1; \
+            else if (G<0)G=0;   \
+            if (B>=(256<<22))   B=(256<<22)-1; \
+            else if (B<0)B=0;   \
+        }\
+
+
+#define YSCALE_YUV_2_GRAY16_C \
+    for (i=0; i<(dstW>>1); i++){\
+        int j;\
+        int Y1 = 1<<18;\
+        int Y2 = 1<<18;\
+        int U  = 1<<18;\
+        int V  = 1<<18;\
+        \
+        const int i2= 2*i;\
+        \
+        for (j=0; j<lumFilterSize; j++)\
+        {\
+            Y1 += lumSrc[j][i2] * lumFilter[j];\
+            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
+        }\
+        Y1>>=11;\
+        Y2>>=11;\
+        if ((Y1|Y2|U|V)&65536)\
+        {\
+            if (Y1>65535)   Y1=65535; \
+            else if (Y1<0)Y1=0;   \
+            if (Y2>65535)   Y2=65535; \
+            else if (Y2<0)Y2=0;   \
+        }
+
+#define YSCALE_YUV_2_RGBX_C(type) \
+    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
+    r = (type *)c->table_rV[V];   \
+    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
+    b = (type *)c->table_bU[U];   \
+
+#define YSCALE_YUV_2_PACKED2_C   \
+    for (i=0; i<(dstW>>1); i++){ \
+        const int i2= 2*i;       \
+        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
+        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
+        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
+        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
+
+#define YSCALE_YUV_2_GRAY16_2_C   \
+    for (i=0; i<(dstW>>1); i++){ \
+        const int i2= 2*i;       \
+        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
+        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
+
+#define YSCALE_YUV_2_RGB2_C(type) \
+    YSCALE_YUV_2_PACKED2_C\
+    type *r, *b, *g;\
+    r = (type *)c->table_rV[V];\
+    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
+    b = (type *)c->table_bU[U];\
+
+#define YSCALE_YUV_2_PACKED1_C \
+    for (i=0; i<(dstW>>1); i++){\
+        const int i2= 2*i;\
+        int Y1= buf0[i2  ]>>7;\
+        int Y2= buf0[i2+1]>>7;\
+        int U= (uvbuf1[i     ])>>7;\
+        int V= (uvbuf1[i+VOFW])>>7;\
+
+#define YSCALE_YUV_2_GRAY16_1_C \
+    for (i=0; i<(dstW>>1); i++){\
+        const int i2= 2*i;\
+        int Y1= buf0[i2  ]<<1;\
+        int Y2= buf0[i2+1]<<1;\
+
+#define YSCALE_YUV_2_RGB1_C(type) \
+    YSCALE_YUV_2_PACKED1_C\
+    type *r, *b, *g;\
+    r = (type *)c->table_rV[V];\
+    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
+    b = (type *)c->table_bU[U];\
+
+#define YSCALE_YUV_2_PACKED1B_C \
+    for (i=0; i<(dstW>>1); i++){\
+        const int i2= 2*i;\
+        int Y1= buf0[i2  ]>>7;\
+        int Y2= buf0[i2+1]>>7;\
+        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
+        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
+
+#define YSCALE_YUV_2_RGB1B_C(type) \
+    YSCALE_YUV_2_PACKED1B_C\
+    type *r, *b, *g;\
+    r = (type *)c->table_rV[V];\
+    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
+    b = (type *)c->table_bU[U];\
+
+#define YSCALE_YUV_2_MONO2_C \
+    const uint8_t * const d128=dither_8x8_220[y&7];\
+    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
+    for (i=0; i<dstW-7; i+=8){\
+        int acc;\
+        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
+        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
+        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
+        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
+        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
+        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
+        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
+        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
+        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
+        dest++;\
+    }\
+
+
+#define YSCALE_YUV_2_MONOX_C \
+    const uint8_t * const d128=dither_8x8_220[y&7];\
+    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
+    int acc=0;\
+    for (i=0; i<dstW-1; i+=2){\
+        int j;\
+        int Y1=1<<18;\
+        int Y2=1<<18;\
+\
+        for (j=0; j<lumFilterSize; j++)\
+        {\
+            Y1 += lumSrc[j][i] * lumFilter[j];\
+            Y2 += lumSrc[j][i+1] * lumFilter[j];\
+        }\
+        Y1>>=19;\
+        Y2>>=19;\
+        if ((Y1|Y2)&256)\
+        {\
+            if (Y1>255)   Y1=255;\
+            else if (Y1<0)Y1=0;\
+            if (Y2>255)   Y2=255;\
+            else if (Y2<0)Y2=0;\
+        }\
+        acc+= acc + g[Y1+d128[(i+0)&7]];\
+        acc+= acc + g[Y2+d128[(i+1)&7]];\
+        if ((i&7)==6){\
+            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
+            dest++;\
+        }\
+    }
+
+
+#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
+    switch(c->dstFormat)\
+    {\
+    case PIX_FMT_RGB32:\
+    case PIX_FMT_BGR32:\
+    case PIX_FMT_RGB32_1:\
+    case PIX_FMT_BGR32_1:\
+        func(uint32_t)\
+            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
+            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
+        }                \
+        break;\
+    case PIX_FMT_RGB24:\
+        func(uint8_t)\
+            ((uint8_t*)dest)[0]= r[Y1];\
+            ((uint8_t*)dest)[1]= g[Y1];\
+            ((uint8_t*)dest)[2]= b[Y1];\
+            ((uint8_t*)dest)[3]= r[Y2];\
+            ((uint8_t*)dest)[4]= g[Y2];\
+            ((uint8_t*)dest)[5]= b[Y2];\
+            dest+=6;\
+        }\
+        break;\
+    case PIX_FMT_BGR24:\
+        func(uint8_t)\
+            ((uint8_t*)dest)[0]= b[Y1];\
+            ((uint8_t*)dest)[1]= g[Y1];\
+            ((uint8_t*)dest)[2]= r[Y1];\
+            ((uint8_t*)dest)[3]= b[Y2];\
+            ((uint8_t*)dest)[4]= g[Y2];\
+            ((uint8_t*)dest)[5]= r[Y2];\
+            dest+=6;\
+        }\
+        break;\
+    case PIX_FMT_RGB565:\
+    case PIX_FMT_BGR565:\
+        {\
+            const int dr1= dither_2x2_8[y&1    ][0];\
+            const int dg1= dither_2x2_4[y&1    ][0];\
+            const int db1= dither_2x2_8[(y&1)^1][0];\
+            const int dr2= dither_2x2_8[y&1    ][1];\
+            const int dg2= dither_2x2_4[y&1    ][1];\
+            const int db2= dither_2x2_8[(y&1)^1][1];\
+            func(uint16_t)\
+                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
+                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
+            }\
+        }\
+        break;\
+    case PIX_FMT_RGB555:\
+    case PIX_FMT_BGR555:\
+        {\
+            const int dr1= dither_2x2_8[y&1    ][0];\
+            const int dg1= dither_2x2_8[y&1    ][1];\
+            const int db1= dither_2x2_8[(y&1)^1][0];\
+            const int dr2= dither_2x2_8[y&1    ][1];\
+            const int dg2= dither_2x2_8[y&1    ][0];\
+            const int db2= dither_2x2_8[(y&1)^1][1];\
+            func(uint16_t)\
+                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
+                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
+            }\
+        }\
+        break;\
+    case PIX_FMT_RGB8:\
+    case PIX_FMT_BGR8:\
+        {\
+            const uint8_t * const d64= dither_8x8_73[y&7];\
+            const uint8_t * const d32= dither_8x8_32[y&7];\
+            func(uint8_t)\
+                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
+                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
+            }\
+        }\
+        break;\
+    case PIX_FMT_RGB4:\
+    case PIX_FMT_BGR4:\
+        {\
+            const uint8_t * const d64= dither_8x8_73 [y&7];\
+            const uint8_t * const d128=dither_8x8_220[y&7];\
+            func(uint8_t)\
+                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
+                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
+            }\
+        }\
+        break;\
+    case PIX_FMT_RGB4_BYTE:\
+    case PIX_FMT_BGR4_BYTE:\
+        {\
+            const uint8_t * const d64= dither_8x8_73 [y&7];\
+            const uint8_t * const d128=dither_8x8_220[y&7];\
+            func(uint8_t)\
+                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
+                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
+            }\
+        }\
+        break;\
+    case PIX_FMT_MONOBLACK:\
+    case PIX_FMT_MONOWHITE:\
+        {\
+            func_monoblack\
+        }\
+        break;\
+    case PIX_FMT_YUYV422:\
+        func2\
+            ((uint8_t*)dest)[2*i2+0]= Y1;\
+            ((uint8_t*)dest)[2*i2+1]= U;\
+            ((uint8_t*)dest)[2*i2+2]= Y2;\
+            ((uint8_t*)dest)[2*i2+3]= V;\
+        }                \
+        break;\
+    case PIX_FMT_UYVY422:\
+        func2\
+            ((uint8_t*)dest)[2*i2+0]= U;\
+            ((uint8_t*)dest)[2*i2+1]= Y1;\
+            ((uint8_t*)dest)[2*i2+2]= V;\
+            ((uint8_t*)dest)[2*i2+3]= Y2;\
+        }                \
+        break;\
+    case PIX_FMT_GRAY16BE:\
+        func_g16\
+            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
+            ((uint8_t*)dest)[2*i2+1]= Y1;\
+            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
+            ((uint8_t*)dest)[2*i2+3]= Y2;\
+        }                \
+        break;\
+    case PIX_FMT_GRAY16LE:\
+        func_g16\
+            ((uint8_t*)dest)[2*i2+0]= Y1;\
+            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
+            ((uint8_t*)dest)[2*i2+2]= Y2;\
+            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
+        }                \
+        break;\
+    }\
+
+
+static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                                  uint8_t *dest, int dstW, int y)
+{
+    int i;
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
+}
+
+static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                                    uint8_t *dest, int dstW, int y)
+{
+    int i;
+    int step= fmt_depth(c->dstFormat)/8;
+    int aidx= 3;
+
+    switch(c->dstFormat){
+    case PIX_FMT_ARGB:
+        dest++;
+        aidx= -1;
+    case PIX_FMT_RGB24:
+        aidx--;
+    case PIX_FMT_RGBA:
+        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
+            dest[aidx]= 255;
+            dest[0]= R>>22;
+            dest[1]= G>>22;
+            dest[2]= B>>22;
+            dest+= step;
+        }
+        break;
+    case PIX_FMT_ABGR:
+        dest++;
+        aidx= -1;
+    case PIX_FMT_BGR24:
+        aidx--;
+    case PIX_FMT_BGRA:
+        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
+            dest[aidx]= 255;
+            dest[0]= B>>22;
+            dest[1]= G>>22;
+            dest[2]= R>>22;
+            dest+= step;
+        }
+        break;
+    default:
+        assert(0);
+    }
+}
+
+//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
+//Plain C versions
+#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
+#define COMPILE_C
+#endif
+
+#if ARCH_PPC
+#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#undef COMPILE_C
+#define COMPILE_ALTIVEC
+#endif
+#endif //ARCH_PPC
+
+#if ARCH_X86
+
+#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#define COMPILE_MMX
+#endif
+
+#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#define COMPILE_MMX2
+#endif
+
+#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#define COMPILE_3DNOW
+#endif
+#endif //ARCH_X86
+
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_ALTIVEC
+#define HAVE_MMX 0
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define HAVE_ALTIVEC 0
+
+#ifdef COMPILE_C
+#define RENAME(a) a ## _C
+#include "swscale_template.c"
+#endif
+
+#ifdef COMPILE_ALTIVEC
+#undef RENAME
+#undef HAVE_ALTIVEC
+#define HAVE_ALTIVEC 1
+#define RENAME(a) a ## _altivec
+#include "swscale_template.c"
+#endif
+
+#if ARCH_X86
+
+//x86 versions
+/*
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define ARCH_X86
+#define RENAME(a) a ## _X86
+#include "swscale_template.c"
+*/
+//MMX versions
+#ifdef COMPILE_MMX
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define RENAME(a) a ## _MMX
+#include "swscale_template.c"
+#endif
+
+//MMX2 versions
+#ifdef COMPILE_MMX2
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define RENAME(a) a ## _MMX2
+#include "swscale_template.c"
+#endif
+
+//3DNOW versions
+#ifdef COMPILE_3DNOW
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 1
+#define RENAME(a) a ## _3DNow
+#include "swscale_template.c"
+#endif
+
+#endif //ARCH_X86
+
+// minor note: the HAVE_xyz are messed up after this line so don't use them
+
+static double getSplineCoeff(double a, double b, double c, double d, double dist)
+{
+//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
+    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
+    else                return getSplineCoeff(        0.0,
+                                             b+ 2.0*c + 3.0*d,
+                                                    c + 3.0*d,
+                                            -b- 3.0*c - 6.0*d,
+                                            dist-1.0);
+}
+
+static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
+                             int srcW, int dstW, int filterAlign, int one, int flags,
+                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
+{
+    int i;
+    int filterSize;
+    int filter2Size;
+    int minFilterSize;
+    int64_t *filter=NULL;
+    int64_t *filter2=NULL;
+    const int64_t fone= 1LL<<54;
+    int ret= -1;
+#if ARCH_X86
+    if (flags & SWS_CPU_CAPS_MMX)
+        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
+#endif
+
+    // NOTE: the +1 is for the MMX scaler which reads over the end
+    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
+
+    if (FFABS(xInc - 0x10000) <10) // unscaled
+    {
+        int i;
+        filterSize= 1;
+        filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
+
+        for (i=0; i<dstW; i++)
+        {
+            filter[i*filterSize]= fone;
+            (*filterPos)[i]=i;
+        }
+
+    }
+    else if (flags&SWS_POINT) // lame looking point sampling mode
+    {
+        int i;
+        int xDstInSrc;
+        filterSize= 1;
+        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
+
+        xDstInSrc= xInc/2 - 0x8000;
+        for (i=0; i<dstW; i++)
+        {
+            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
+
+            (*filterPos)[i]= xx;
+            filter[i]= fone;
+            xDstInSrc+= xInc;
+        }
+    }
+    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
+    {
+        int i;
+        int xDstInSrc;
+        if      (flags&SWS_BICUBIC) filterSize= 4;
+        else if (flags&SWS_X      ) filterSize= 4;
+        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
+        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
+
+        xDstInSrc= xInc/2 - 0x8000;
+        for (i=0; i<dstW; i++)
+        {
+            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
+            int j;
+
+            (*filterPos)[i]= xx;
+                //bilinear upscale / linear interpolate / area averaging
+                for (j=0; j<filterSize; j++)
+                {
+                    int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
+                    if (coeff<0) coeff=0;
+                    filter[i*filterSize + j]= coeff;
+                    xx++;
+                }
+            xDstInSrc+= xInc;
+        }
+    }
+    else
+    {
+        int xDstInSrc;
+        int sizeFactor;
+
+        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
+        else if (flags&SWS_X)            sizeFactor=  8;
+        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
+        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
+        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
+        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
+        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
+        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
+        else {
+            sizeFactor= 0; //GCC warning killer
+            assert(0);
+        }
+
+        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
+        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
+
+        if (filterSize > srcW-2) filterSize=srcW-2;
+
+        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
+
+        xDstInSrc= xInc - 0x10000;
+        for (i=0; i<dstW; i++)
+        {
+            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
+            int j;
+            (*filterPos)[i]= xx;
+            for (j=0; j<filterSize; j++)
+            {
+                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
+                double floatd;
+                int64_t coeff;
+
+                if (xInc > 1<<16)
+                    d= d*dstW/srcW;
+                floatd= d * (1.0/(1<<30));
+
+                if (flags & SWS_BICUBIC)
+                {
+                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
+                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
+                    int64_t dd = ( d*d)>>30;
+                    int64_t ddd= (dd*d)>>30;
+
+                    if      (d < 1LL<<30)
+                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
+                    else if (d < 1LL<<31)
+                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
+                    else
+                        coeff=0.0;
+                    coeff *= fone>>(30+24);
+                }
+/*                else if (flags & SWS_X)
+                {
+                    double p= param ? param*0.01 : 0.3;
+                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
+                    coeff*= pow(2.0, - p*d*d);
+                }*/
+                else if (flags & SWS_X)
+                {
+                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
+                    double c;
+
+                    if (floatd<1.0)
+                        c = cos(floatd*PI);
+                    else
+                        c=-1.0;
+                    if (c<0.0)      c= -pow(-c, A);
+                    else            c=  pow( c, A);
+                    coeff= (c*0.5 + 0.5)*fone;
+                }
+                else if (flags & SWS_AREA)
+                {
+                    int64_t d2= d - (1<<29);
+                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
+                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
+                    else coeff=0.0;
+                    coeff *= fone>>(30+16);
+                }
+                else if (flags & SWS_GAUSS)
+                {
+                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
+                }
+                else if (flags & SWS_SINC)
+                {
+                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
+                }
+                else if (flags & SWS_LANCZOS)
+                {
+                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
+                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
+                    if (floatd>p) coeff=0;
+                }
+                else if (flags & SWS_BILINEAR)
+                {
+                    coeff= (1<<30) - d;
+                    if (coeff<0) coeff=0;
+                    coeff *= fone >> 30;
+                }
+                else if (flags & SWS_SPLINE)
+                {
+                    double p=-2.196152422706632;
+                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
+                }
+                else {
+                    coeff= 0.0; //GCC warning killer
+                    assert(0);
+                }
+
+                filter[i*filterSize + j]= coeff;
+                xx++;
+            }
+            xDstInSrc+= 2*xInc;
+        }
+    }
+
+    /* apply src & dst Filter to filter -> filter2
+       av_free(filter);
+    */
+    assert(filterSize>0);
+    filter2Size= filterSize;
+    if (srcFilter) filter2Size+= srcFilter->length - 1;
+    if (dstFilter) filter2Size+= dstFilter->length - 1;
+    assert(filter2Size>0);
+    filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
+
+    for (i=0; i<dstW; i++)
+    {
+        int j, k;
+
+        if(srcFilter){
+            for (k=0; k<srcFilter->length; k++){
+                for (j=0; j<filterSize; j++)
+                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
+            }
+        }else{
+            for (j=0; j<filterSize; j++)
+                filter2[i*filter2Size + j]= filter[i*filterSize + j];
+        }
+        //FIXME dstFilter
+
+        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
+    }
+    av_freep(&filter);
+
+    /* try to reduce the filter-size (step1 find size and shift left) */
+    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
+    minFilterSize= 0;
+    for (i=dstW-1; i>=0; i--)
+    {
+        int min= filter2Size;
+        int j;
+        int64_t cutOff=0.0;
+
+        /* get rid off near zero elements on the left by shifting left */
+        for (j=0; j<filter2Size; j++)
+        {
+            int k;
+            cutOff += FFABS(filter2[i*filter2Size]);
+
+            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
+
+            /* preserve monotonicity because the core can't handle the filter otherwise */
+            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
+
+            // move filter coefficients left
+            for (k=1; k<filter2Size; k++)
+                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
+            filter2[i*filter2Size + k - 1]= 0;
+            (*filterPos)[i]++;
+        }
+
+        cutOff=0;
+        /* count near zeros on the right */
+        for (j=filter2Size-1; j>0; j--)
+        {
+            cutOff += FFABS(filter2[i*filter2Size + j]);
+
+            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
+            min--;
+        }
+
+        if (min>minFilterSize) minFilterSize= min;
+    }
+
+    if (flags & SWS_CPU_CAPS_ALTIVEC) {
+        // we can handle the special case 4,
+        // so we don't want to go to the full 8
+        if (minFilterSize < 5)
+            filterAlign = 4;
+
+        // We really don't want to waste our time
+        // doing useless computation, so fall back on
+        // the scalar C code for very small filters.
+        // Vectorizing is worth it only if you have a
+        // decent-sized vector.
+        if (minFilterSize < 3)
+            filterAlign = 1;
+    }
+
+    if (flags & SWS_CPU_CAPS_MMX) {
+        // special case for unscaled vertical filtering
+        if (minFilterSize == 1 && filterAlign == 2)
+            filterAlign= 1;
+    }
+
+    assert(minFilterSize > 0);
+    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
+    assert(filterSize > 0);
+    filter= av_malloc(filterSize*dstW*sizeof(*filter));
+    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
+        goto error;
+    *outFilterSize= filterSize;
+
+    if (flags&SWS_PRINT_INFO)
+        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
+    /* try to reduce the filter-size (step2 reduce it) */
+    for (i=0; i<dstW; i++)
+    {
+        int j;
+
+        for (j=0; j<filterSize; j++)
+        {
+            if (j>=filter2Size) filter[i*filterSize + j]= 0;
+            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
+            if((flags & SWS_BITEXACT) && j>=minFilterSize)
+                filter[i*filterSize + j]= 0;
+        }
+    }
+
+
+    //FIXME try to align filterPos if possible
+
+    //fix borders
+    for (i=0; i<dstW; i++)
+    {
+        int j;
+        if ((*filterPos)[i] < 0)
+        {
+            // move filter coefficients left to compensate for filterPos
+            for (j=1; j<filterSize; j++)
+            {
+                int left= FFMAX(j + (*filterPos)[i], 0);
+                filter[i*filterSize + left] += filter[i*filterSize + j];
+                filter[i*filterSize + j]=0;
+            }
+            (*filterPos)[i]= 0;
+        }
+
+        if ((*filterPos)[i] + filterSize > srcW)
+        {
+            int shift= (*filterPos)[i] + filterSize - srcW;
+            // move filter coefficients right to compensate for filterPos
+            for (j=filterSize-2; j>=0; j--)
+            {
+                int right= FFMIN(j + shift, filterSize-1);
+                filter[i*filterSize +right] += filter[i*filterSize +j];
+                filter[i*filterSize +j]=0;
+            }
+            (*filterPos)[i]= srcW - filterSize;
+        }
+    }
+
+    // Note the +1 is for the MMX scaler which reads over the end
+    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
+    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
+
+    /* normalize & store in outFilter */
+    for (i=0; i<dstW; i++)
+    {
+        int j;
+        int64_t error=0;
+        int64_t sum=0;
+
+        for (j=0; j<filterSize; j++)
+        {
+            sum+= filter[i*filterSize + j];
+        }
+        sum= (sum + one/2)/ one;
+        for (j=0; j<*outFilterSize; j++)
+        {
+            int64_t v= filter[i*filterSize + j] + error;
+            int intV= ROUNDED_DIV(v, sum);
+            (*outFilter)[i*(*outFilterSize) + j]= intV;
+            error= v - intV*sum;
+        }
+    }
+
+    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
+    for (i=0; i<*outFilterSize; i++)
+    {
+        int j= dstW*(*outFilterSize);
+        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
+    }
+
+    ret=0;
+error:
+    av_free(filter);
+    av_free(filter2);
+    return ret;
+}
+
+#ifdef COMPILE_MMX2
+static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
+{
+    uint8_t *fragmentA;
+    long imm8OfPShufW1A;
+    long imm8OfPShufW2A;
+    long fragmentLengthA;
+    uint8_t *fragmentB;
+    long imm8OfPShufW1B;
+    long imm8OfPShufW2B;
+    long fragmentLengthB;
+    int fragmentPos;
+
+    int xpos, i;
+
+    // create an optimized horizontal scaling routine
+
+    //code fragment
+
+    __asm__ volatile(
+        "jmp                         9f                 \n\t"
+    // Begin
+        "0:                                             \n\t"
+        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
+        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
+        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
+        "punpcklbw                %%mm7, %%mm1          \n\t"
+        "punpcklbw                %%mm7, %%mm0          \n\t"
+        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
+        "1:                                             \n\t"
+        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
+        "2:                                             \n\t"
+        "psubw                    %%mm1, %%mm0          \n\t"
+        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
+        "pmullw                   %%mm3, %%mm0          \n\t"
+        "psllw                       $7, %%mm1          \n\t"
+        "paddw                    %%mm1, %%mm0          \n\t"
+
+        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
+
+        "add                         $8, %%"REG_a"      \n\t"
+    // End
+        "9:                                             \n\t"
+//        "int $3                                         \n\t"
+        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
+        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
+        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
+        "dec                         %1                 \n\t"
+        "dec                         %2                 \n\t"
+        "sub                         %0, %1             \n\t"
+        "sub                         %0, %2             \n\t"
+        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
+        "sub                         %0, %3             \n\t"
+
+
+        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
+        "=r" (fragmentLengthA)
+    );
+
+    __asm__ volatile(
+        "jmp                         9f                 \n\t"
+    // Begin
+        "0:                                             \n\t"
+        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
+        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
+        "punpcklbw                %%mm7, %%mm0          \n\t"
+        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
+        "1:                                             \n\t"
+        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
+        "2:                                             \n\t"
+        "psubw                    %%mm1, %%mm0          \n\t"
+        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
+        "pmullw                   %%mm3, %%mm0          \n\t"
+        "psllw                       $7, %%mm1          \n\t"
+        "paddw                    %%mm1, %%mm0          \n\t"
+
+        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
+
+        "add                         $8, %%"REG_a"      \n\t"
+    // End
+        "9:                                             \n\t"
+//        "int                       $3                   \n\t"
+        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
+        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
+        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
+        "dec                         %1                 \n\t"
+        "dec                         %2                 \n\t"
+        "sub                         %0, %1             \n\t"
+        "sub                         %0, %2             \n\t"
+        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
+        "sub                         %0, %3             \n\t"
+
+
+        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
+        "=r" (fragmentLengthB)
+    );
+
+    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
+    fragmentPos=0;
+
+    for (i=0; i<dstW/numSplits; i++)
+    {
+        int xx=xpos>>16;
+
+        if ((i&3) == 0)
+        {
+            int a=0;
+            int b=((xpos+xInc)>>16) - xx;
+            int c=((xpos+xInc*2)>>16) - xx;
+            int d=((xpos+xInc*3)>>16) - xx;
+
+            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
+            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
+            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
+            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
+            filterPos[i/2]= xx;
+
+            if (d+1<4)
+            {
+                int maxShift= 3-(d+1);
+                int shift=0;
+
+                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
+
+                funnyCode[fragmentPos + imm8OfPShufW1B]=
+                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
+                funnyCode[fragmentPos + imm8OfPShufW2B]=
+                    a | (b<<2) | (c<<4) | (d<<6);
+
+                if (i+3>=dstW) shift=maxShift; //avoid overread
+                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
+
+                if (shift && i>=shift)
+                {
+                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
+                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
+                    filterPos[i/2]-=shift;
+                }
+
+                fragmentPos+= fragmentLengthB;
+            }
+            else
+            {
+                int maxShift= 3-d;
+                int shift=0;
+
+                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
+
+                funnyCode[fragmentPos + imm8OfPShufW1A]=
+                funnyCode[fragmentPos + imm8OfPShufW2A]=
+                    a | (b<<2) | (c<<4) | (d<<6);
+
+                if (i+4>=dstW) shift=maxShift; //avoid overread
+                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
+
+                if (shift && i>=shift)
+                {
+                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
+                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
+                    filterPos[i/2]-=shift;
+                }
+
+                fragmentPos+= fragmentLengthA;
+            }
+
+            funnyCode[fragmentPos]= RET;
+        }
+        xpos+=xInc;
+    }
+    filterPos[i/2]= xpos>>16; // needed to jump to the next part
+}
+#endif /* COMPILE_MMX2 */
+
+static void globalInit(void){
+    // generating tables:
+    int i;
+    for (i=0; i<768; i++){
+        int c= av_clip_uint8(i-256);
+        clip_table[i]=c;
+    }
+}
+
+static SwsFunc getSwsFunc(int flags){
+
+#if defined(RUNTIME_CPUDETECT) && CONFIG_GPL
+#if ARCH_X86
+    // ordered per speed fastest first
+    if (flags & SWS_CPU_CAPS_MMX2)
+        return swScale_MMX2;
+    else if (flags & SWS_CPU_CAPS_3DNOW)
+        return swScale_3DNow;
+    else if (flags & SWS_CPU_CAPS_MMX)
+        return swScale_MMX;
+    else
+        return swScale_C;
+
+#else
+#if ARCH_PPC
+    if (flags & SWS_CPU_CAPS_ALTIVEC)
+        return swScale_altivec;
+    else
+        return swScale_C;
+#endif
+    return swScale_C;
+#endif /* ARCH_X86 */
+#else //RUNTIME_CPUDETECT
+#if   HAVE_MMX2
+    return swScale_MMX2;
+#elif HAVE_AMD3DNOW
+    return swScale_3DNow;
+#elif HAVE_MMX
+    return swScale_MMX;
+#elif HAVE_ALTIVEC
+    return swScale_altivec;
+#else
+    return swScale_C;
+#endif
+#endif //!RUNTIME_CPUDETECT
+}
+
+static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+    /* Copy Y plane */
+    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
+        memcpy(dst, src[0], srcSliceH*dstStride[0]);
+    else
+    {
+        int i;
+        uint8_t *srcPtr= src[0];
+        uint8_t *dstPtr= dst;
+        for (i=0; i<srcSliceH; i++)
+        {
+            memcpy(dstPtr, srcPtr, c->srcW);
+            srcPtr+= srcStride[0];
+            dstPtr+= dstStride[0];
+        }
+    }
+    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
+    if (c->dstFormat == PIX_FMT_NV12)
+        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
+    else
+        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
+
+    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
+
+    return srcSliceH;
+}
+
+static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                          int srcSliceH, uint8_t* dst[], int dstStride[]){
+    const enum PixelFormat srcFormat= c->srcFormat;
+    const enum PixelFormat dstFormat= c->dstFormat;
+    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
+                 const uint8_t *palette)=NULL;
+    int i;
+    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+    uint8_t *srcPtr= src[0];
+
+    if (!usePal(srcFormat))
+        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+               sws_format_name(srcFormat), sws_format_name(dstFormat));
+
+    switch(dstFormat){
+    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
+    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
+    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
+    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
+    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
+    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
+    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
+    }
+
+
+    for (i=0; i<srcSliceH; i++) {
+        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
+        srcPtr+= srcStride[0];
+        dstPtr+= dstStride[0];
+    }
+
+    return srcSliceH;
+}
+
+/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
+static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                          int srcSliceH, uint8_t* dst[], int dstStride[]){
+    const enum PixelFormat srcFormat= c->srcFormat;
+    const enum PixelFormat dstFormat= c->dstFormat;
+    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
+    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
+    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
+    const int dstId= fmt_depth(dstFormat) >> 2;
+    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
+
+    /* BGR -> BGR */
+    if (  (isBGR(srcFormat) && isBGR(dstFormat))
+       || (isRGB(srcFormat) && isRGB(dstFormat))){
+        switch(srcId | (dstId<<4)){
+        case 0x34: conv= rgb16to15; break;
+        case 0x36: conv= rgb24to15; break;
+        case 0x38: conv= rgb32to15; break;
+        case 0x43: conv= rgb15to16; break;
+        case 0x46: conv= rgb24to16; break;
+        case 0x48: conv= rgb32to16; break;
+        case 0x63: conv= rgb15to24; break;
+        case 0x64: conv= rgb16to24; break;
+        case 0x68: conv= rgb32to24; break;
+        case 0x83: conv= rgb15to32; break;
+        case 0x84: conv= rgb16to32; break;
+        case 0x86: conv= rgb24to32; break;
+        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
+        }
+    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
+             || (isRGB(srcFormat) && isBGR(dstFormat))){
+        switch(srcId | (dstId<<4)){
+        case 0x33: conv= rgb15tobgr15; break;
+        case 0x34: conv= rgb16tobgr15; break;
+        case 0x36: conv= rgb24tobgr15; break;
+        case 0x38: conv= rgb32tobgr15; break;
+        case 0x43: conv= rgb15tobgr16; break;
+        case 0x44: conv= rgb16tobgr16; break;
+        case 0x46: conv= rgb24tobgr16; break;
+        case 0x48: conv= rgb32tobgr16; break;
+        case 0x63: conv= rgb15tobgr24; break;
+        case 0x64: conv= rgb16tobgr24; break;
+        case 0x66: conv= rgb24tobgr24; break;
+        case 0x68: conv= rgb32tobgr24; break;
+        case 0x83: conv= rgb15tobgr32; break;
+        case 0x84: conv= rgb16tobgr32; break;
+        case 0x86: conv= rgb24tobgr32; break;
+        case 0x88: conv= rgb32tobgr32; break;
+        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
+        }
+    }else{
+        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
+               sws_format_name(srcFormat), sws_format_name(dstFormat));
+    }
+
+    if(conv)
+    {
+        uint8_t *srcPtr= src[0];
+        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
+            srcPtr += ALT32_CORR;
+
+        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
+            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
+        else
+        {
+            int i;
+            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+
+            for (i=0; i<srcSliceH; i++)
+            {
+                conv(srcPtr, dstPtr, c->srcW*srcBpp);
+                srcPtr+= srcStride[0];
+                dstPtr+= dstStride[0];
+            }
+        }
+    }
+    return srcSliceH;
+}
+
+static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                              int srcSliceH, uint8_t* dst[], int dstStride[]){
+
+    rgb24toyv12(
+        src[0],
+        dst[0]+ srcSliceY    *dstStride[0],
+        dst[1]+(srcSliceY>>1)*dstStride[1],
+        dst[2]+(srcSliceY>>1)*dstStride[2],
+        c->srcW, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0]);
+    return srcSliceH;
+}
+
+static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                             int srcSliceH, uint8_t* dst[], int dstStride[]){
+    int i;
+
+    /* copy Y */
+    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
+        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
+    else{
+        uint8_t *srcPtr= src[0];
+        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+
+        for (i=0; i<srcSliceH; i++)
+        {
+            memcpy(dstPtr, srcPtr, c->srcW);
+            srcPtr+= srcStride[0];
+            dstPtr+= dstStride[0];
+        }
+    }
+
+    if (c->dstFormat==PIX_FMT_YUV420P){
+        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
+        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
+    }else{
+        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
+        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
+    }
+    return srcSliceH;
+}
+
+/* unscaled copy like stuff (assumes nearly identical formats) */
+static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                      int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
+        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
+    else
+    {
+        int i;
+        uint8_t *srcPtr= src[0];
+        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+        int length=0;
+
+        /* universal length finder */
+        while(length+c->srcW <= FFABS(dstStride[0])
+           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
+        assert(length!=0);
+
+        for (i=0; i<srcSliceH; i++)
+        {
+            memcpy(dstPtr, srcPtr, length);
+            srcPtr+= srcStride[0];
+            dstPtr+= dstStride[0];
+        }
+    }
+    return srcSliceH;
+}
+
+static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                      int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    int plane;
+    for (plane=0; plane<3; plane++)
+    {
+        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
+        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
+        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
+
+        if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
+        {
+            if (!isGray(c->dstFormat))
+                memset(dst[plane], 128, dstStride[plane]*height);
+        }
+        else
+        {
+            if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
+                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
+            else
+            {
+                int i;
+                uint8_t *srcPtr= src[plane];
+                uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
+                for (i=0; i<height; i++)
+                {
+                    memcpy(dstPtr, srcPtr, length);
+                    srcPtr+= srcStride[plane];
+                    dstPtr+= dstStride[plane];
+                }
+            }
+        }
+    }
+    return srcSliceH;
+}
+
+static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                        int srcSliceH, uint8_t* dst[], int dstStride[]){
+
+    int length= c->srcW;
+    int y=      srcSliceY;
+    int height= srcSliceH;
+    int i, j;
+    uint8_t *srcPtr= src[0];
+    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
+
+    if (!isGray(c->dstFormat)){
+        int height= -((-srcSliceH)>>c->chrDstVSubSample);
+        memset(dst[1], 128, dstStride[1]*height);
+        memset(dst[2], 128, dstStride[2]*height);
+    }
+    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
+    for (i=0; i<height; i++)
+    {
+        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
+        srcPtr+= srcStride[0];
+        dstPtr+= dstStride[0];
+    }
+    return srcSliceH;
+}
+
+static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                        int srcSliceH, uint8_t* dst[], int dstStride[]){
+
+    int length= c->srcW;
+    int y=      srcSliceY;
+    int height= srcSliceH;
+    int i, j;
+    uint8_t *srcPtr= src[0];
+    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
+    for (i=0; i<height; i++)
+    {
+        for (j=0; j<length; j++)
+        {
+            dstPtr[j<<1] = srcPtr[j];
+            dstPtr[(j<<1)+1] = srcPtr[j];
+        }
+        srcPtr+= srcStride[0];
+        dstPtr+= dstStride[0];
+    }
+    return srcSliceH;
+}
+
+static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                      int srcSliceH, uint8_t* dst[], int dstStride[]){
+
+    int length= c->srcW;
+    int y=      srcSliceY;
+    int height= srcSliceH;
+    int i, j;
+    uint16_t *srcPtr= (uint16_t*)src[0];
+    uint16_t *dstPtr= (uint16_t*)(dst[0] + dstStride[0]*y/2);
+    for (i=0; i<height; i++)
+    {
+        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
+        srcPtr+= srcStride[0]/2;
+        dstPtr+= dstStride[0]/2;
+    }
+    return srcSliceH;
+}
+
+
+static void getSubSampleFactors(int *h, int *v, int format){
+    switch(format){
+    case PIX_FMT_UYVY422:
+    case PIX_FMT_YUYV422:
+        *h=1;
+        *v=0;
+        break;
+    case PIX_FMT_YUV420P:
+    case PIX_FMT_YUVA420P:
+    case PIX_FMT_GRAY16BE:
+    case PIX_FMT_GRAY16LE:
+    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
+    case PIX_FMT_NV12:
+    case PIX_FMT_NV21:
+        *h=1;
+        *v=1;
+        break;
+    case PIX_FMT_YUV440P:
+        *h=0;
+        *v=1;
+        break;
+    case PIX_FMT_YUV410P:
+        *h=2;
+        *v=2;
+        break;
+    case PIX_FMT_YUV444P:
+        *h=0;
+        *v=0;
+        break;
+    case PIX_FMT_YUV422P:
+        *h=1;
+        *v=0;
+        break;
+    case PIX_FMT_YUV411P:
+        *h=2;
+        *v=0;
+        break;
+    default:
+        *h=0;
+        *v=0;
+        break;
+    }
+}
+
+static uint16_t roundToInt16(int64_t f){
+    int r= (f + (1<<15))>>16;
+         if (r<-0x7FFF) return 0x8000;
+    else if (r> 0x7FFF) return 0x7FFF;
+    else                return r;
+}
+
+/**
+ * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
+ * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
+ * @return -1 if not supported
+ */
+int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
+    int64_t crv =  inv_table[0];
+    int64_t cbu =  inv_table[1];
+    int64_t cgu = -inv_table[2];
+    int64_t cgv = -inv_table[3];
+    int64_t cy  = 1<<16;
+    int64_t oy  = 0;
+
+    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
+    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
+
+    c->brightness= brightness;
+    c->contrast  = contrast;
+    c->saturation= saturation;
+    c->srcRange  = srcRange;
+    c->dstRange  = dstRange;
+    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0;
+
+    c->uOffset=   0x0400040004000400LL;
+    c->vOffset=   0x0400040004000400LL;
+
+    if (!srcRange){
+        cy= (cy*255) / 219;
+        oy= 16<<16;
+    }else{
+        crv= (crv*224) / 255;
+        cbu= (cbu*224) / 255;
+        cgu= (cgu*224) / 255;
+        cgv= (cgv*224) / 255;
+    }
+
+    cy = (cy *contrast             )>>16;
+    crv= (crv*contrast * saturation)>>32;
+    cbu= (cbu*contrast * saturation)>>32;
+    cgu= (cgu*contrast * saturation)>>32;
+    cgv= (cgv*contrast * saturation)>>32;
+
+    oy -= 256*brightness;
+
+    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
+    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
+    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
+    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
+    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
+    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
+
+    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
+    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
+    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
+    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
+    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
+    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
+
+    sws_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
+    //FIXME factorize
+
+#ifdef COMPILE_ALTIVEC
+    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+        sws_yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
+#endif
+    return 0;
+}
+
+/**
+ * @return -1 if not supported
+ */
+int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
+    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
+
+    *inv_table = c->srcColorspaceTable;
+    *table     = c->dstColorspaceTable;
+    *srcRange  = c->srcRange;
+    *dstRange  = c->dstRange;
+    *brightness= c->brightness;
+    *contrast  = c->contrast;
+    *saturation= c->saturation;
+
+    return 0;
+}
+
+static int handle_jpeg(enum PixelFormat *format)
+{
+    switch (*format) {
+        case PIX_FMT_YUVJ420P:
+            *format = PIX_FMT_YUV420P;
+            return 1;
+        case PIX_FMT_YUVJ422P:
+            *format = PIX_FMT_YUV422P;
+            return 1;
+        case PIX_FMT_YUVJ444P:
+            *format = PIX_FMT_YUV444P;
+            return 1;
+        case PIX_FMT_YUVJ440P:
+            *format = PIX_FMT_YUV440P;
+            return 1;
+        default:
+            return 0;
+    }
+}
+
+SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
+                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
+
+    SwsContext *c;
+    int i;
+    int usesVFilter, usesHFilter;
+    int unscaled, needsDither;
+    int srcRange, dstRange;
+    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
+#if ARCH_X86
+    if (flags & SWS_CPU_CAPS_MMX)
+        __asm__ volatile("emms\n\t"::: "memory");
+#endif
+
+#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
+    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
+#if   HAVE_MMX2
+    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
+#elif HAVE_AMD3DNOW
+    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
+#elif HAVE_MMX
+    flags |= SWS_CPU_CAPS_MMX;
+#elif HAVE_ALTIVEC
+    flags |= SWS_CPU_CAPS_ALTIVEC;
+#elif ARCH_BFIN
+    flags |= SWS_CPU_CAPS_BFIN;
+#endif
+#endif /* RUNTIME_CPUDETECT */
+    if (clip_table[512] != 255) globalInit();
+    if (!rgb15to16) sws_rgb2rgb_init(flags);
+
+    unscaled = (srcW == dstW && srcH == dstH);
+    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
+        && (fmt_depth(dstFormat))<24
+        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
+
+    srcRange = handle_jpeg(&srcFormat);
+    dstRange = handle_jpeg(&dstFormat);
+
+    if (!isSupportedIn(srcFormat))
+    {
+        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
+        return NULL;
+    }
+    if (!isSupportedOut(dstFormat))
+    {
+        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
+        return NULL;
+    }
+
+    i= flags & ( SWS_POINT
+                |SWS_AREA
+                |SWS_BILINEAR
+                |SWS_FAST_BILINEAR
+                |SWS_BICUBIC
+                |SWS_X
+                |SWS_GAUSS
+                |SWS_LANCZOS
+                |SWS_SINC
+                |SWS_SPLINE
+                |SWS_BICUBLIN);
+    if(!i || (i & (i-1)))
+    {
+        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
+        return NULL;
+    }
+
+    /* sanity check */
+    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
+    {
+        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
+               srcW, srcH, dstW, dstH);
+        return NULL;
+    }
+    if(srcW > VOFW || dstW > VOFW){
+        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
+        return NULL;
+    }
+
+    if (!dstFilter) dstFilter= &dummyFilter;
+    if (!srcFilter) srcFilter= &dummyFilter;
+
+    c= av_mallocz(sizeof(SwsContext));
+
+    c->av_class = &sws_context_class;
+    c->srcW= srcW;
+    c->srcH= srcH;
+    c->dstW= dstW;
+    c->dstH= dstH;
+    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
+    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
+    c->flags= flags;
+    c->dstFormat= dstFormat;
+    c->srcFormat= srcFormat;
+    c->vRounder= 4* 0x0001000100010001ULL;
+
+    usesHFilter= usesVFilter= 0;
+    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
+    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
+    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
+    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
+    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
+    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
+    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
+    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
+
+    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
+    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
+
+    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
+    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
+
+    // drop some chroma lines if the user wants it
+    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
+    c->chrSrcVSubSample+= c->vChrDrop;
+
+    // drop every other pixel for chroma calculation unless user wants full chroma
+    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
+      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
+      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
+      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
+      && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
+        c->chrSrcHSubSample=1;
+
+    if (param){
+        c->param[0] = param[0];
+        c->param[1] = param[1];
+    }else{
+        c->param[0] =
+        c->param[1] = SWS_PARAM_DEFAULT;
+    }
+
+    c->chrIntHSubSample= c->chrDstHSubSample;
+    c->chrIntVSubSample= c->chrSrcVSubSample;
+
+    // Note the -((-x)>>y) is so that we always round toward +inf.
+    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
+    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
+    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
+    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
+
+    sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
+
+    /* unscaled special cases */
+    if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
+    {
+        /* yv12_to_nv12 */
+        if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
+        {
+            c->swScale= PlanarToNV12Wrapper;
+        }
+        /* yuv2bgr */
+        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
+            && !(flags & SWS_ACCURATE_RND) && !(dstH&1))
+        {
+            c->swScale= sws_yuv2rgb_get_func_ptr(c);
+        }
+
+        if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_BITEXACT))
+        {
+            c->swScale= yvu9toyv12Wrapper;
+        }
+
+        /* bgr24toYV12 */
+        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND))
+            c->swScale= bgr24toyv12Wrapper;
+
+        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
+        if (  (isBGR(srcFormat) || isRGB(srcFormat))
+           && (isBGR(dstFormat) || isRGB(dstFormat))
+           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
+           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
+           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
+           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
+           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
+           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
+           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
+           && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
+                                             && dstFormat != PIX_FMT_RGB32_1
+                                             && dstFormat != PIX_FMT_BGR32_1
+           && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
+             c->swScale= rgb2rgbWrapper;
+
+        if ((usePal(srcFormat) && (
+                 dstFormat == PIX_FMT_RGB32   ||
+                 dstFormat == PIX_FMT_RGB32_1 ||
+                 dstFormat == PIX_FMT_RGB24   ||
+                 dstFormat == PIX_FMT_BGR32   ||
+                 dstFormat == PIX_FMT_BGR32_1 ||
+                 dstFormat == PIX_FMT_BGR24)))
+             c->swScale= pal2rgbWrapper;
+
+        if (srcFormat == PIX_FMT_YUV422P)
+        {
+            if (dstFormat == PIX_FMT_YUYV422)
+                c->swScale= YUV422PToYuy2Wrapper;
+            else if (dstFormat == PIX_FMT_UYVY422)
+                c->swScale= YUV422PToUyvyWrapper;
+        }
+
+        /* LQ converters if -sws 0 or -sws 4*/
+        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
+            /* yv12_to_yuy2 */
+            if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P)
+            {
+                if (dstFormat == PIX_FMT_YUYV422)
+                    c->swScale= PlanarToYuy2Wrapper;
+                else if (dstFormat == PIX_FMT_UYVY422)
+                    c->swScale= PlanarToUyvyWrapper;
+            }
+        }
+
+#ifdef COMPILE_ALTIVEC
+        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+            !(c->flags & SWS_BITEXACT) &&
+            srcFormat == PIX_FMT_YUV420P) {
+          // unscaled YV12 -> packed YUV, we want speed
+          if (dstFormat == PIX_FMT_YUYV422)
+              c->swScale= yv12toyuy2_unscaled_altivec;
+          else if (dstFormat == PIX_FMT_UYVY422)
+              c->swScale= yv12touyvy_unscaled_altivec;
+        }
+#endif
+
+        /* simple copy */
+        if (  srcFormat == dstFormat
+            || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
+            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
+            || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
+        {
+            if (isPacked(c->srcFormat))
+                c->swScale= packedCopy;
+            else /* Planar YUV or gray */
+                c->swScale= planarCopy;
+        }
+
+        /* gray16{le,be} conversions */
+        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
+        {
+            c->swScale= gray16togray;
+        }
+        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
+        {
+            c->swScale= graytogray16;
+        }
+        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
+        {
+            c->swScale= gray16swap;
+        }
+
+#if ARCH_BFIN
+        if (flags & SWS_CPU_CAPS_BFIN)
+            ff_bfin_get_unscaled_swscale (c);
+#endif
+
+        if (c->swScale){
+            if (flags&SWS_PRINT_INFO)
+                av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
+                                sws_format_name(srcFormat), sws_format_name(dstFormat));
+            return c;
+        }
+    }
+
+    if (flags & SWS_CPU_CAPS_MMX2)
+    {
+        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
+        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
+        {
+            if (flags&SWS_PRINT_INFO)
+                av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
+        }
+        if (usesHFilter) c->canMMX2BeUsed=0;
+    }
+    else
+        c->canMMX2BeUsed=0;
+
+    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
+    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
+
+    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
+    // but only for the FAST_BILINEAR mode otherwise do correct scaling
+    // n-2 is the last chrominance sample available
+    // this is not perfect, but no one should notice the difference, the more correct variant
+    // would be like the vertical one, but that would require some special code for the
+    // first and last pixel
+    if (flags&SWS_FAST_BILINEAR)
+    {
+        if (c->canMMX2BeUsed)
+        {
+            c->lumXInc+= 20;
+            c->chrXInc+= 20;
+        }
+        //we don't use the x86 asm scaler if MMX is available
+        else if (flags & SWS_CPU_CAPS_MMX)
+        {
+            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
+            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
+        }
+    }
+
+    /* precalculate horizontal scaler filter coefficients */
+    {
+        const int filterAlign=
+            (flags & SWS_CPU_CAPS_MMX) ? 4 :
+            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+            1;
+
+        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
+                   srcW      ,       dstW, filterAlign, 1<<14,
+                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                   srcFilter->lumH, dstFilter->lumH, c->param);
+        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
+                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
+                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                   srcFilter->chrH, dstFilter->chrH, c->param);
+
+#define MAX_FUNNY_CODE_SIZE 10000
+#if defined(COMPILE_MMX2)
+// can't downscale !!!
+        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
+        {
+#ifdef MAP_ANONYMOUS
+            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+#else
+            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
+            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
+#endif
+
+            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
+            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
+            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
+            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
+
+            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
+            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
+        }
+#endif /* defined(COMPILE_MMX2) */
+    } // initialize horizontal stuff
+
+
+
+    /* precalculate vertical scaler filter coefficients */
+    {
+        const int filterAlign=
+            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
+            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+            1;
+
+        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
+                   srcH      ,        dstH, filterAlign, (1<<12),
+                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                   srcFilter->lumV, dstFilter->lumV, c->param);
+        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
+                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
+                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                   srcFilter->chrV, dstFilter->chrV, c->param);
+
+#if HAVE_ALTIVEC
+        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
+        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
+
+        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
+            int j;
+            short *p = (short *)&c->vYCoeffsBank[i];
+            for (j=0;j<8;j++)
+                p[j] = c->vLumFilter[i];
+        }
+
+        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
+            int j;
+            short *p = (short *)&c->vCCoeffsBank[i];
+            for (j=0;j<8;j++)
+                p[j] = c->vChrFilter[i];
+        }
+#endif
+    }
+
+    // calculate buffer sizes so that they won't run out while handling these damn slices
+    c->vLumBufSize= c->vLumFilterSize;
+    c->vChrBufSize= c->vChrFilterSize;
+    for (i=0; i<dstH; i++)
+    {
+        int chrI= i*c->chrDstH / dstH;
+        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
+                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
+
+        nextSlice>>= c->chrSrcVSubSample;
+        nextSlice<<= c->chrSrcVSubSample;
+        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
+            c->vLumBufSize= nextSlice - c->vLumFilterPos[i];
+        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
+            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
+    }
+
+    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
+    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
+    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
+    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
+    /* align at 16 bytes for AltiVec */
+    for (i=0; i<c->vLumBufSize; i++)
+        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
+    for (i=0; i<c->vChrBufSize; i++)
+        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
+
+    //try to avoid drawing green stuff between the right end and the stride end
+    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
+
+    assert(2*VOFW == VOF);
+
+    assert(c->chrDstH <= dstH);
+
+    if (flags&SWS_PRINT_INFO)
+    {
+#ifdef DITHER1XBPP
+        const char *dither= " dithered";
+#else
+        const char *dither= "";
+#endif
+        if (flags&SWS_FAST_BILINEAR)
+            av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
+        else if (flags&SWS_BILINEAR)
+            av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
+        else if (flags&SWS_BICUBIC)
+            av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
+        else if (flags&SWS_X)
+            av_log(c, AV_LOG_INFO, "Experimental scaler, ");
+        else if (flags&SWS_POINT)
+            av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
+        else if (flags&SWS_AREA)
+            av_log(c, AV_LOG_INFO, "Area Averageing scaler, ");
+        else if (flags&SWS_BICUBLIN)
+            av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
+        else if (flags&SWS_GAUSS)
+            av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
+        else if (flags&SWS_SINC)
+            av_log(c, AV_LOG_INFO, "Sinc scaler, ");
+        else if (flags&SWS_LANCZOS)
+            av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
+        else if (flags&SWS_SPLINE)
+            av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
+        else
+            av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
+
+        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
+            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
+                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
+        else
+            av_log(c, AV_LOG_INFO, "from %s to %s ",
+                   sws_format_name(srcFormat), sws_format_name(dstFormat));
+
+        if (flags & SWS_CPU_CAPS_MMX2)
+            av_log(c, AV_LOG_INFO, "using MMX2\n");
+        else if (flags & SWS_CPU_CAPS_3DNOW)
+            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
+        else if (flags & SWS_CPU_CAPS_MMX)
+            av_log(c, AV_LOG_INFO, "using MMX\n");
+        else if (flags & SWS_CPU_CAPS_ALTIVEC)
+            av_log(c, AV_LOG_INFO, "using AltiVec\n");
+        else
+            av_log(c, AV_LOG_INFO, "using C\n");
+    }
+
+    if (flags & SWS_PRINT_INFO)
+    {
+        if (flags & SWS_CPU_CAPS_MMX)
+        {
+            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
+                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
+            else
+            {
+                if (c->hLumFilterSize==4)
+                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n");
+                else if (c->hLumFilterSize==8)
+                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal luminance scaling\n");
+                else
+                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal luminance scaling\n");
+
+                if (c->hChrFilterSize==4)
+                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
+                else if (c->hChrFilterSize==8)
+                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
+                else
+                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
+            }
+        }
+        else
+        {
+#if ARCH_X86
+            av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
+#else
+            if (flags & SWS_FAST_BILINEAR)
+                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
+            else
+                av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
+#endif
+        }
+        if (isPlanarYUV(dstFormat))
+        {
+            if (c->vLumFilterSize==1)
+                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            else
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+        }
+        else
+        {
+            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
+                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
+                       "      2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
+                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            else
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+        }
+
+        if (dstFormat==PIX_FMT_BGR24)
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
+                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
+        else if (dstFormat==PIX_FMT_RGB32)
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+        else if (dstFormat==PIX_FMT_BGR565)
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+        else if (dstFormat==PIX_FMT_BGR555)
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+
+        av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
+    }
+    if (flags & SWS_PRINT_INFO)
+    {
+        av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
+        av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
+    }
+
+    c->swScale= getSwsFunc(flags);
+    return c;
+}
+
+/**
+ * swscale wrapper, so we don't need to export the SwsContext.
+ * Assumes planar YUV to be in YUV order instead of YVU.
+ */
+int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+              int srcSliceH, uint8_t* dst[], int dstStride[]){
+    int i;
+    uint8_t* src2[4]= {src[0], src[1], src[2]};
+
+    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
+        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
+        return 0;
+    }
+    if (c->sliceDir == 0) {
+        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
+    }
+
+    if (usePal(c->srcFormat)){
+        for (i=0; i<256; i++){
+            int p, r, g, b,y,u,v;
+            if(c->srcFormat == PIX_FMT_PAL8){
+                p=((uint32_t*)(src[1]))[i];
+                r= (p>>16)&0xFF;
+                g= (p>> 8)&0xFF;
+                b=  p     &0xFF;
+            }else if(c->srcFormat == PIX_FMT_RGB8){
+                r= (i>>5    )*36;
+                g= ((i>>2)&7)*36;
+                b= (i&3     )*85;
+            }else if(c->srcFormat == PIX_FMT_BGR8){
+                b= (i>>6    )*85;
+                g= ((i>>3)&7)*36;
+                r= (i&7     )*36;
+            }else if(c->srcFormat == PIX_FMT_RGB4_BYTE){
+                r= (i>>3    )*255;
+                g= ((i>>1)&3)*85;
+                b= (i&1     )*255;
+            }else {
+                assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
+                b= (i>>3    )*255;
+                g= ((i>>1)&3)*85;
+                r= (i&1     )*255;
+            }
+            y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+            u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+            v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+            c->pal_yuv[i]= y + (u<<8) + (v<<16);
+
+
+            switch(c->dstFormat) {
+            case PIX_FMT_BGR32:
+#ifndef WORDS_BIGENDIAN
+            case PIX_FMT_RGB24:
+#endif
+                c->pal_rgb[i]=  r + (g<<8) + (b<<16);
+                break;
+            case PIX_FMT_BGR32_1:
+#ifdef  WORDS_BIGENDIAN
+            case PIX_FMT_BGR24:
+#endif
+                c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
+                break;
+            case PIX_FMT_RGB32_1:
+#ifdef  WORDS_BIGENDIAN
+            case PIX_FMT_RGB24:
+#endif
+                c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
+                break;
+            case PIX_FMT_RGB32:
+#ifndef WORDS_BIGENDIAN
+            case PIX_FMT_BGR24:
+#endif
+            default:
+                c->pal_rgb[i]=  b + (g<<8) + (r<<16);
+            }
+        }
+    }
+
+    // copy strides, so they can safely be modified
+    if (c->sliceDir == 1) {
+        // slices go from top to bottom
+        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
+        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
+        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
+    } else {
+        // slices go from bottom to top => we flip the image internally
+        uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
+                           dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
+                           dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
+        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
+        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
+
+        src2[0] += (srcSliceH-1)*srcStride[0];
+        if (!usePal(c->srcFormat))
+            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
+        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
+
+        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
+    }
+}
+
+#if LIBSWSCALE_VERSION_MAJOR < 1
+int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                      int srcSliceH, uint8_t* dst[], int dstStride[]){
+    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+}
+#endif
+
+SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
+                                float lumaSharpen, float chromaSharpen,
+                                float chromaHShift, float chromaVShift,
+                                int verbose)
+{
+    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
+
+    if (lumaGBlur!=0.0){
+        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
+        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
+    }else{
+        filter->lumH= sws_getIdentityVec();
+        filter->lumV= sws_getIdentityVec();
+    }
+
+    if (chromaGBlur!=0.0){
+        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
+        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
+    }else{
+        filter->chrH= sws_getIdentityVec();
+        filter->chrV= sws_getIdentityVec();
+    }
+
+    if (chromaSharpen!=0.0){
+        SwsVector *id= sws_getIdentityVec();
+        sws_scaleVec(filter->chrH, -chromaSharpen);
+        sws_scaleVec(filter->chrV, -chromaSharpen);
+        sws_addVec(filter->chrH, id);
+        sws_addVec(filter->chrV, id);
+        sws_freeVec(id);
+    }
+
+    if (lumaSharpen!=0.0){
+        SwsVector *id= sws_getIdentityVec();
+        sws_scaleVec(filter->lumH, -lumaSharpen);
+        sws_scaleVec(filter->lumV, -lumaSharpen);
+        sws_addVec(filter->lumH, id);
+        sws_addVec(filter->lumV, id);
+        sws_freeVec(id);
+    }
+
+    if (chromaHShift != 0.0)
+        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
+
+    if (chromaVShift != 0.0)
+        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
+
+    sws_normalizeVec(filter->chrH, 1.0);
+    sws_normalizeVec(filter->chrV, 1.0);
+    sws_normalizeVec(filter->lumH, 1.0);
+    sws_normalizeVec(filter->lumV, 1.0);
+
+    if (verbose) sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
+    if (verbose) sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
+
+    return filter;
+}
+
+SwsVector *sws_getGaussianVec(double variance, double quality){
+    const int length= (int)(variance*quality + 0.5) | 1;
+    int i;
+    double *coeff= av_malloc(length*sizeof(double));
+    double middle= (length-1)*0.5;
+    SwsVector *vec= av_malloc(sizeof(SwsVector));
+
+    vec->coeff= coeff;
+    vec->length= length;
+
+    for (i=0; i<length; i++)
+    {
+        double dist= i-middle;
+        coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI);
+    }
+
+    sws_normalizeVec(vec, 1.0);
+
+    return vec;
+}
+
+SwsVector *sws_getConstVec(double c, int length){
+    int i;
+    double *coeff= av_malloc(length*sizeof(double));
+    SwsVector *vec= av_malloc(sizeof(SwsVector));
+
+    vec->coeff= coeff;
+    vec->length= length;
+
+    for (i=0; i<length; i++)
+        coeff[i]= c;
+
+    return vec;
+}
+
+
+SwsVector *sws_getIdentityVec(void){
+    return sws_getConstVec(1.0, 1);
+}
+
+double sws_dcVec(SwsVector *a){
+    int i;
+    double sum=0;
+
+    for (i=0; i<a->length; i++)
+        sum+= a->coeff[i];
+
+    return sum;
+}
+
+void sws_scaleVec(SwsVector *a, double scalar){
+    int i;
+
+    for (i=0; i<a->length; i++)
+        a->coeff[i]*= scalar;
+}
+
+void sws_normalizeVec(SwsVector *a, double height){
+    sws_scaleVec(a, height/sws_dcVec(a));
+}
+
+static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
+    int length= a->length + b->length - 1;
+    double *coeff= av_malloc(length*sizeof(double));
+    int i, j;
+    SwsVector *vec= av_malloc(sizeof(SwsVector));
+
+    vec->coeff= coeff;
+    vec->length= length;
+
+    for (i=0; i<length; i++) coeff[i]= 0.0;
+
+    for (i=0; i<a->length; i++)
+    {
+        for (j=0; j<b->length; j++)
+        {
+            coeff[i+j]+= a->coeff[i]*b->coeff[j];
+        }
+    }
+
+    return vec;
+}
+
+static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
+    int length= FFMAX(a->length, b->length);
+    double *coeff= av_malloc(length*sizeof(double));
+    int i;
+    SwsVector *vec= av_malloc(sizeof(SwsVector));
+
+    vec->coeff= coeff;
+    vec->length= length;
+
+    for (i=0; i<length; i++) coeff[i]= 0.0;
+
+    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
+    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
+
+    return vec;
+}
+
+static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
+    int length= FFMAX(a->length, b->length);
+    double *coeff= av_malloc(length*sizeof(double));
+    int i;
+    SwsVector *vec= av_malloc(sizeof(SwsVector));
+
+    vec->coeff= coeff;
+    vec->length= length;
+
+    for (i=0; i<length; i++) coeff[i]= 0.0;
+
+    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
+    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
+
+    return vec;
+}
+
+/* shift left / or right if "shift" is negative */
+static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
+    int length= a->length + FFABS(shift)*2;
+    double *coeff= av_malloc(length*sizeof(double));
+    int i;
+    SwsVector *vec= av_malloc(sizeof(SwsVector));
+
+    vec->coeff= coeff;
+    vec->length= length;
+
+    for (i=0; i<length; i++) coeff[i]= 0.0;
+
+    for (i=0; i<a->length; i++)
+    {
+        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
+    }
+
+    return vec;
+}
+
+void sws_shiftVec(SwsVector *a, int shift){
+    SwsVector *shifted= sws_getShiftedVec(a, shift);
+    av_free(a->coeff);
+    a->coeff= shifted->coeff;
+    a->length= shifted->length;
+    av_free(shifted);
+}
+
+void sws_addVec(SwsVector *a, SwsVector *b){
+    SwsVector *sum= sws_sumVec(a, b);
+    av_free(a->coeff);
+    a->coeff= sum->coeff;
+    a->length= sum->length;
+    av_free(sum);
+}
+
+void sws_subVec(SwsVector *a, SwsVector *b){
+    SwsVector *diff= sws_diffVec(a, b);
+    av_free(a->coeff);
+    a->coeff= diff->coeff;
+    a->length= diff->length;
+    av_free(diff);
+}
+
+void sws_convVec(SwsVector *a, SwsVector *b){
+    SwsVector *conv= sws_getConvVec(a, b);
+    av_free(a->coeff);
+    a->coeff= conv->coeff;
+    a->length= conv->length;
+    av_free(conv);
+}
+
+SwsVector *sws_cloneVec(SwsVector *a){
+    double *coeff= av_malloc(a->length*sizeof(double));
+    int i;
+    SwsVector *vec= av_malloc(sizeof(SwsVector));
+
+    vec->coeff= coeff;
+    vec->length= a->length;
+
+    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
+
+    return vec;
+}
+
+void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){
+    int i;
+    double max=0;
+    double min=0;
+    double range;
+
+    for (i=0; i<a->length; i++)
+        if (a->coeff[i]>max) max= a->coeff[i];
+
+    for (i=0; i<a->length; i++)
+        if (a->coeff[i]<min) min= a->coeff[i];
+
+    range= max - min;
+
+    for (i=0; i<a->length; i++)
+    {
+        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
+        av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
+        for (;x>0; x--) av_log(log_ctx, log_level, " ");
+        av_log(log_ctx, log_level, "|\n");
+    }
+}
+
+#if LIBSWSCALE_VERSION_MAJOR < 1
+void sws_printVec(SwsVector *a){
+    sws_printVec2(a, NULL, AV_LOG_DEBUG);
+}
+#endif
+
+void sws_freeVec(SwsVector *a){
+    if (!a) return;
+    av_freep(&a->coeff);
+    a->length=0;
+    av_free(a);
+}
+
+void sws_freeFilter(SwsFilter *filter){
+    if (!filter) return;
+
+    if (filter->lumH) sws_freeVec(filter->lumH);
+    if (filter->lumV) sws_freeVec(filter->lumV);
+    if (filter->chrH) sws_freeVec(filter->chrH);
+    if (filter->chrV) sws_freeVec(filter->chrV);
+    av_free(filter);
+}
+
+
+void sws_freeContext(SwsContext *c){
+    int i;
+    if (!c) return;
+
+    if (c->lumPixBuf)
+    {
+        for (i=0; i<c->vLumBufSize; i++)
+            av_freep(&c->lumPixBuf[i]);
+        av_freep(&c->lumPixBuf);
+    }
+
+    if (c->chrPixBuf)
+    {
+        for (i=0; i<c->vChrBufSize; i++)
+            av_freep(&c->chrPixBuf[i]);
+        av_freep(&c->chrPixBuf);
+    }
+
+    av_freep(&c->vLumFilter);
+    av_freep(&c->vChrFilter);
+    av_freep(&c->hLumFilter);
+    av_freep(&c->hChrFilter);
+#if HAVE_ALTIVEC
+    av_freep(&c->vYCoeffsBank);
+    av_freep(&c->vCCoeffsBank);
+#endif
+
+    av_freep(&c->vLumFilterPos);
+    av_freep(&c->vChrFilterPos);
+    av_freep(&c->hLumFilterPos);
+    av_freep(&c->hChrFilterPos);
+
+#if ARCH_X86 && CONFIG_GPL
+#ifdef MAP_ANONYMOUS
+    if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
+    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
+#else
+    av_free(c->funnyYCode);
+    av_free(c->funnyUVCode);
+#endif
+    c->funnyYCode=NULL;
+    c->funnyUVCode=NULL;
+#endif /* ARCH_X86 && CONFIG_GPL */
+
+    av_freep(&c->lumMmx2Filter);
+    av_freep(&c->chrMmx2Filter);
+    av_freep(&c->lumMmx2FilterPos);
+    av_freep(&c->chrMmx2FilterPos);
+    av_freep(&c->yuvTable);
+
+    av_free(c);
+}
+
+struct SwsContext *sws_getCachedContext(struct SwsContext *context,
+                                        int srcW, int srcH, enum PixelFormat srcFormat,
+                                        int dstW, int dstH, enum PixelFormat dstFormat, int flags,
+                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
+{
+    static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};
+
+    if (!param)
+        param = default_param;
+
+    if (context) {
+        if (context->srcW != srcW || context->srcH != srcH ||
+            context->srcFormat != srcFormat ||
+            context->dstW != dstW || context->dstH != dstH ||
+            context->dstFormat != dstFormat || context->flags != flags ||
+            context->param[0] != param[0] || context->param[1] != param[1])
+        {
+            sws_freeContext(context);
+            context = NULL;
+        }
+    }
+    if (!context) {
+        return sws_getContext(srcW, srcH, srcFormat,
+                              dstW, dstH, dstFormat, flags,
+                              srcFilter, dstFilter, param);
+    }
+    return context;
+}
+
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
new file mode 100644
index 0000000000..6efd90fcda
--- /dev/null
+++ b/libswscale/swscale.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWSCALE_SWSCALE_H
+#define SWSCALE_SWSCALE_H
+
+/**
+ * @file libswscale/swscale.h
+ * @brief
+ *     external api for the swscale stuff
+ */
+
+#include "libavutil/avutil.h"
+
+#define LIBSWSCALE_VERSION_MAJOR 0
+#define LIBSWSCALE_VERSION_MINOR 7
+#define LIBSWSCALE_VERSION_MICRO 1
+
+#define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
+                                               LIBSWSCALE_VERSION_MINOR, \
+                                               LIBSWSCALE_VERSION_MICRO)
+#define LIBSWSCALE_VERSION      AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \
+                                           LIBSWSCALE_VERSION_MINOR, \
+                                           LIBSWSCALE_VERSION_MICRO)
+#define LIBSWSCALE_BUILD        LIBSWSCALE_VERSION_INT
+
+#define LIBSWSCALE_IDENT        "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION)
+
+/**
+ * Returns the LIBSWSCALE_VERSION_INT constant.
+ */
+unsigned swscale_version(void);
+
+/* values for the flags, the stuff on the command line is different */
+#define SWS_FAST_BILINEAR     1
+#define SWS_BILINEAR          2
+#define SWS_BICUBIC           4
+#define SWS_X                 8
+#define SWS_POINT          0x10
+#define SWS_AREA           0x20
+#define SWS_BICUBLIN       0x40
+#define SWS_GAUSS          0x80
+#define SWS_SINC          0x100
+#define SWS_LANCZOS       0x200
+#define SWS_SPLINE        0x400
+
+#define SWS_SRC_V_CHR_DROP_MASK     0x30000
+#define SWS_SRC_V_CHR_DROP_SHIFT    16
+
+#define SWS_PARAM_DEFAULT           123456
+
+#define SWS_PRINT_INFO              0x1000
+
+//the following 3 flags are not completely implemented
+//internal chrominace subsampling info
+#define SWS_FULL_CHR_H_INT    0x2000
+//input subsampling info
+#define SWS_FULL_CHR_H_INP    0x4000
+#define SWS_DIRECT_BGR        0x8000
+#define SWS_ACCURATE_RND      0x40000
+#define SWS_BITEXACT          0x80000
+
+#define SWS_CPU_CAPS_MMX      0x80000000
+#define SWS_CPU_CAPS_MMX2     0x20000000
+#define SWS_CPU_CAPS_3DNOW    0x40000000
+#define SWS_CPU_CAPS_ALTIVEC  0x10000000
+#define SWS_CPU_CAPS_BFIN     0x01000000
+
+#define SWS_MAX_REDUCE_CUTOFF 0.002
+
+#define SWS_CS_ITU709         1
+#define SWS_CS_FCC            4
+#define SWS_CS_ITU601         5
+#define SWS_CS_ITU624         5
+#define SWS_CS_SMPTE170M      5
+#define SWS_CS_SMPTE240M      7
+#define SWS_CS_DEFAULT        5
+
+
+
+// when used for filters they must have an odd number of elements
+// coeffs cannot be shared between vectors
+typedef struct {
+    double *coeff;              ///< pointer to the list of coefficients
+    int length;                 ///< number of coefficients in the vector
+} SwsVector;
+
+// vectors can be shared
+typedef struct {
+    SwsVector *lumH;
+    SwsVector *lumV;
+    SwsVector *chrH;
+    SwsVector *chrV;
+} SwsFilter;
+
+struct SwsContext;
+
+void sws_freeContext(struct SwsContext *swsContext);
+
+/**
+ * Allocates and returns a SwsContext. You need it to perform
+ * scaling/conversion operations using sws_scale().
+ *
+ * @param srcW the width of the source image
+ * @param srcH the height of the source image
+ * @param srcFormat the source image format
+ * @param dstW the width of the destination image
+ * @param dstH the height of the destination image
+ * @param dstFormat the destination image format
+ * @param flags specify which algorithm and options to use for rescaling
+ * @return a pointer to an allocated context, or NULL in case of error
+ */
+struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
+                                  SwsFilter *srcFilter, SwsFilter *dstFilter, double *param);
+
+/**
+ * Scales the image slice in \p srcSlice and puts the resulting scaled
+ * slice in the image in \p dst. A slice is a sequence of consecutive
+ * rows in an image.
+ *
+ * @param context   the scaling context previously created with
+ *                  sws_getContext()
+ * @param srcSlice  the array containing the pointers to the planes of
+ *                  the source slice
+ * @param srcStride the array containing the strides for each plane of
+ *                  the source image
+ * @param srcSliceY the position in the source image of the slice to
+ *                  process, that is the number (counted starting from
+ *                  zero) in the image of the first row of the slice
+ * @param srcSliceH the height of the source slice, that is the number
+ *                  of rows in the slice
+ * @param dst       the array containing the pointers to the planes of
+ *                  the destination image
+ * @param dstStride the array containing the strides for each plane of
+ *                  the destination image
+ * @return          the height of the output slice
+ */
+int sws_scale(struct SwsContext *context, uint8_t* srcSlice[], int srcStride[], int srcSliceY,
+              int srcSliceH, uint8_t* dst[], int dstStride[]);
+#if LIBSWSCALE_VERSION_MAJOR < 1
+/**
+ * @deprecated Use sws_scale() instead.
+ */
+int sws_scale_ordered(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
+                      int srcSliceH, uint8_t* dst[], int dstStride[]) attribute_deprecated;
+#endif
+
+
+int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation);
+int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation);
+
+/**
+ * Returns a normalized Gaussian curve used to filter stuff
+ * quality=3 is high quality, lower is lower quality.
+ */
+SwsVector *sws_getGaussianVec(double variance, double quality);
+
+/**
+ * Allocates and returns a vector with \p length coefficients, all
+ * with the same value \p c.
+ */
+SwsVector *sws_getConstVec(double c, int length);
+
+/**
+ * Allocates and returns a vector with just one coefficient, with
+ * value 1.0.
+ */
+SwsVector *sws_getIdentityVec(void);
+
+/**
+ * Scales all the coefficients of \p a by the \p scalar value.
+ */
+void sws_scaleVec(SwsVector *a, double scalar);
+
+/**
+ * Scales all the coefficients of \p a so that their sum equals \p
+ * height."
+ */
+void sws_normalizeVec(SwsVector *a, double height);
+void sws_convVec(SwsVector *a, SwsVector *b);
+void sws_addVec(SwsVector *a, SwsVector *b);
+void sws_subVec(SwsVector *a, SwsVector *b);
+void sws_shiftVec(SwsVector *a, int shift);
+
+/**
+ * Allocates and returns a clone of the vector \p a, that is a vector
+ * with the same coefficients as \p a.
+ */
+SwsVector *sws_cloneVec(SwsVector *a);
+
+#if LIBSWSCALE_VERSION_MAJOR < 1
+/**
+ * @deprecated Use sws_printVec2() instead.
+ */
+attribute_deprecated void sws_printVec(SwsVector *a);
+#endif
+
+/**
+ * Prints with av_log() a textual representation of the vector \p a
+ * if \p log_level <= av_log_level.
+ */
+void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level);
+
+void sws_freeVec(SwsVector *a);
+
+SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
+                                float lumaSharpen, float chromaSharpen,
+                                float chromaHShift, float chromaVShift,
+                                int verbose);
+void sws_freeFilter(SwsFilter *filter);
+
+/**
+ * Checks if \p context can be reused, otherwise reallocates a new
+ * one.
+ *
+ * If \p context is NULL, just calls sws_getContext() to get a new
+ * context. Otherwise, checks if the parameters are the ones already
+ * saved in \p context. If that is the case, returns the current
+ * context. Otherwise, frees \p context and gets a new context with
+ * the new parameters.
+ *
+ * Be warned that \p srcFilter and \p dstFilter are not checked, they
+ * are assumed to remain the same.
+ */
+struct SwsContext *sws_getCachedContext(struct SwsContext *context,
+                                        int srcW, int srcH, enum PixelFormat srcFormat,
+                                        int dstW, int dstH, enum PixelFormat dstFormat, int flags,
+                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param);
+
+#endif /* SWSCALE_SWSCALE_H */
diff --git a/libswscale/swscale_altivec_template.c b/libswscale/swscale_altivec_template.c
new file mode 100644
index 0000000000..a008b966e8
--- /dev/null
+++ b/libswscale/swscale_altivec_template.c
@@ -0,0 +1,538 @@
+/*
+ * AltiVec-enhanced yuv2yuvX
+ *
+ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
+ * based on the equivalent C code in swscale.c
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define vzero vec_splat_s32(0)
+
+static inline void
+altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) {
+    register int i;
+    vector unsigned int altivec_vectorShiftInt19 =
+        vec_add(vec_splat_u32(10), vec_splat_u32(9));
+    if ((unsigned long)dest % 16) {
+        /* badly aligned store, we force store alignment */
+        /* and will handle load misalignment on val w/ vec_perm */
+        vector unsigned char perm1;
+        vector signed int v1;
+        for (i = 0 ; (i < dstW) &&
+            (((unsigned long)dest + i) % 16) ; i++) {
+                int t = val[i] >> 19;
+                dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
+        }
+        perm1 = vec_lvsl(i << 2, val);
+        v1 = vec_ld(i << 2, val);
+        for ( ; i < (dstW - 15); i+=16) {
+            int offset = i << 2;
+            vector signed int v2 = vec_ld(offset + 16, val);
+            vector signed int v3 = vec_ld(offset + 32, val);
+            vector signed int v4 = vec_ld(offset + 48, val);
+            vector signed int v5 = vec_ld(offset + 64, val);
+            vector signed int v12 = vec_perm(v1, v2, perm1);
+            vector signed int v23 = vec_perm(v2, v3, perm1);
+            vector signed int v34 = vec_perm(v3, v4, perm1);
+            vector signed int v45 = vec_perm(v4, v5, perm1);
+
+            vector signed int vA = vec_sra(v12, altivec_vectorShiftInt19);
+            vector signed int vB = vec_sra(v23, altivec_vectorShiftInt19);
+            vector signed int vC = vec_sra(v34, altivec_vectorShiftInt19);
+            vector signed int vD = vec_sra(v45, altivec_vectorShiftInt19);
+            vector unsigned short vs1 = vec_packsu(vA, vB);
+            vector unsigned short vs2 = vec_packsu(vC, vD);
+            vector unsigned char vf = vec_packsu(vs1, vs2);
+            vec_st(vf, i, dest);
+            v1 = v5;
+        }
+    } else { // dest is properly aligned, great
+        for (i = 0; i < (dstW - 15); i+=16) {
+            int offset = i << 2;
+            vector signed int v1 = vec_ld(offset, val);
+            vector signed int v2 = vec_ld(offset + 16, val);
+            vector signed int v3 = vec_ld(offset + 32, val);
+            vector signed int v4 = vec_ld(offset + 48, val);
+            vector signed int v5 = vec_sra(v1, altivec_vectorShiftInt19);
+            vector signed int v6 = vec_sra(v2, altivec_vectorShiftInt19);
+            vector signed int v7 = vec_sra(v3, altivec_vectorShiftInt19);
+            vector signed int v8 = vec_sra(v4, altivec_vectorShiftInt19);
+            vector unsigned short vs1 = vec_packsu(v5, v6);
+            vector unsigned short vs2 = vec_packsu(v7, v8);
+            vector unsigned char vf = vec_packsu(vs1, vs2);
+            vec_st(vf, i, dest);
+        }
+    }
+    for ( ; i < dstW ; i++) {
+        int t = val[i] >> 19;
+        dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
+    }
+}
+
+static inline void
+yuv2yuvX_altivec_real(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                      int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                      uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
+{
+    const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
+    register int i, j;
+    {
+        int __attribute__ ((aligned (16))) val[dstW];
+
+        for (i = 0; i < (dstW -7); i+=4) {
+            vec_st(vini, i << 2, val);
+        }
+        for (; i < dstW; i++) {
+            val[i] = (1 << 18);
+        }
+
+        for (j = 0; j < lumFilterSize; j++) {
+            vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter);
+            vector unsigned char perm, perm0 = vec_lvsl(j << 1, lumFilter);
+            vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
+            vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
+
+            perm = vec_lvsl(0, lumSrc[j]);
+            l1 = vec_ld(0, lumSrc[j]);
+
+            for (i = 0; i < (dstW - 7); i+=8) {
+                int offset = i << 2;
+                vector signed short l2 = vec_ld((i << 1) + 16, lumSrc[j]);
+
+                vector signed int v1 = vec_ld(offset, val);
+                vector signed int v2 = vec_ld(offset + 16, val);
+
+                vector signed short ls = vec_perm(l1, l2, perm); // lumSrc[j][i] ... lumSrc[j][i+7]
+
+                vector signed int i1 = vec_mule(vLumFilter, ls);
+                vector signed int i2 = vec_mulo(vLumFilter, ls);
+
+                vector signed int vf1 = vec_mergeh(i1, i2);
+                vector signed int vf2 = vec_mergel(i1, i2); // lumSrc[j][i] * lumFilter[j] ... lumSrc[j][i+7] * lumFilter[j]
+
+                vector signed int vo1 = vec_add(v1, vf1);
+                vector signed int vo2 = vec_add(v2, vf2);
+
+                vec_st(vo1, offset, val);
+                vec_st(vo2, offset + 16, val);
+
+                l1 = l2;
+            }
+            for ( ; i < dstW; i++) {
+                val[i] += lumSrc[j][i] * lumFilter[j];
+            }
+        }
+        altivec_packIntArrayToCharArray(val, dest, dstW);
+    }
+    if (uDest != 0) {
+        int  __attribute__ ((aligned (16))) u[chrDstW];
+        int  __attribute__ ((aligned (16))) v[chrDstW];
+
+        for (i = 0; i < (chrDstW -7); i+=4) {
+            vec_st(vini, i << 2, u);
+            vec_st(vini, i << 2, v);
+        }
+        for (; i < chrDstW; i++) {
+            u[i] = (1 << 18);
+            v[i] = (1 << 18);
+        }
+
+        for (j = 0; j < chrFilterSize; j++) {
+            vector signed short l1, l1_V, vChrFilter = vec_ld(j << 1, chrFilter);
+            vector unsigned char perm, perm0 = vec_lvsl(j << 1, chrFilter);
+            vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
+            vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
+
+            perm = vec_lvsl(0, chrSrc[j]);
+            l1 = vec_ld(0, chrSrc[j]);
+            l1_V = vec_ld(2048 << 1, chrSrc[j]);
+
+            for (i = 0; i < (chrDstW - 7); i+=8) {
+                int offset = i << 2;
+                vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
+                vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]);
+
+                vector signed int v1 = vec_ld(offset, u);
+                vector signed int v2 = vec_ld(offset + 16, u);
+                vector signed int v1_V = vec_ld(offset, v);
+                vector signed int v2_V = vec_ld(offset + 16, v);
+
+                vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
+                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055]
+
+                vector signed int i1 = vec_mule(vChrFilter, ls);
+                vector signed int i2 = vec_mulo(vChrFilter, ls);
+                vector signed int i1_V = vec_mule(vChrFilter, ls_V);
+                vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
+
+                vector signed int vf1 = vec_mergeh(i1, i2);
+                vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+                vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
+                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+
+                vector signed int vo1 = vec_add(v1, vf1);
+                vector signed int vo2 = vec_add(v2, vf2);
+                vector signed int vo1_V = vec_add(v1_V, vf1_V);
+                vector signed int vo2_V = vec_add(v2_V, vf2_V);
+
+                vec_st(vo1, offset, u);
+                vec_st(vo2, offset + 16, u);
+                vec_st(vo1_V, offset, v);
+                vec_st(vo2_V, offset + 16, v);
+
+                l1 = l2;
+                l1_V = l2_V;
+            }
+            for ( ; i < chrDstW; i++) {
+                u[i] += chrSrc[j][i] * chrFilter[j];
+                v[i] += chrSrc[j][i + 2048] * chrFilter[j];
+            }
+        }
+        altivec_packIntArrayToCharArray(u, uDest, chrDstW);
+        altivec_packIntArrayToCharArray(v, vDest, chrDstW);
+    }
+}
+
+static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, int16_t *filter, int16_t *filterPos, int filterSize) {
+    register int i;
+    int __attribute__ ((aligned (16))) tempo[4];
+
+    if (filterSize % 4) {
+        for (i=0; i<dstW; i++) {
+            register int j;
+            register int srcPos = filterPos[i];
+            register int val = 0;
+            for (j=0; j<filterSize; j++) {
+                val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+            }
+            dst[i] = FFMIN(val>>7, (1<<15)-1);
+        }
+    }
+    else
+    switch (filterSize) {
+    case 4:
+    {
+    for (i=0; i<dstW; i++) {
+        register int srcPos = filterPos[i];
+
+        vector unsigned char src_v0 = vec_ld(srcPos, src);
+        vector unsigned char src_v1, src_vF;
+        vector signed short src_v, filter_v;
+        vector signed int val_vEven, val_s;
+        if ((((int)src + srcPos)% 16) > 12) {
+            src_v1 = vec_ld(srcPos + 16, src);
+        }
+        src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
+
+        src_v = // vec_unpackh sign-extends...
+            (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
+        // now put our elements in the even slots
+        src_v = vec_mergeh(src_v, (vector signed short)vzero);
+
+        filter_v = vec_ld(i << 3, filter);
+        // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
+
+        // The neat trick: We only care for half the elements,
+        // high or low depending on (i<<3)%16 (it's 0 or 8 here),
+        // and we're going to use vec_mule, so we choose
+        // carefully how to "unpack" the elements into the even slots.
+        if ((i << 3) % 16)
+            filter_v = vec_mergel(filter_v, (vector signed short)vzero);
+        else
+            filter_v = vec_mergeh(filter_v, (vector signed short)vzero);
+
+        val_vEven = vec_mule(src_v, filter_v);
+        val_s = vec_sums(val_vEven, vzero);
+        vec_st(val_s, 0, tempo);
+        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
+    }
+    }
+    break;
+
+    case 8:
+    {
+    for (i=0; i<dstW; i++) {
+        register int srcPos = filterPos[i];
+
+        vector unsigned char src_v0 = vec_ld(srcPos, src);
+        vector unsigned char src_v1, src_vF;
+        vector signed short src_v, filter_v;
+        vector signed int val_v, val_s;
+        if ((((int)src + srcPos)% 16) > 8) {
+            src_v1 = vec_ld(srcPos + 16, src);
+        }
+        src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
+
+        src_v = // vec_unpackh sign-extends...
+            (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
+        filter_v = vec_ld(i << 4, filter);
+        // the 4 above is 3 (filterSize == 8) + 1 (sizeof(short) == 2)
+
+        val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
+        val_s = vec_sums(val_v, vzero);
+        vec_st(val_s, 0, tempo);
+        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
+    }
+    }
+    break;
+
+    case 16:
+    {
+        for (i=0; i<dstW; i++) {
+            register int srcPos = filterPos[i];
+
+            vector unsigned char src_v0 = vec_ld(srcPos, src);
+            vector unsigned char src_v1 = vec_ld(srcPos + 16, src);
+            vector unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
+
+            vector signed short src_vA = // vec_unpackh sign-extends...
+                (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
+            vector signed short src_vB = // vec_unpackh sign-extends...
+                (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
+
+            vector signed short filter_v0 = vec_ld(i << 5, filter);
+            vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
+            // the 5 above are 4 (filterSize == 16) + 1 (sizeof(short) == 2)
+
+            vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
+            vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
+
+            vector signed int val_s = vec_sums(val_v, vzero);
+
+            vec_st(val_s, 0, tempo);
+            dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
+        }
+    }
+    break;
+
+    default:
+    {
+    for (i=0; i<dstW; i++) {
+        register int j;
+        register int srcPos = filterPos[i];
+
+        vector signed int val_s, val_v = (vector signed int)vzero;
+        vector signed short filter_v0R = vec_ld(i * 2 * filterSize, filter);
+        vector unsigned char permF = vec_lvsl((i * 2 * filterSize), filter);
+
+        vector unsigned char src_v0 = vec_ld(srcPos, src);
+        vector unsigned char permS = vec_lvsl(srcPos, src);
+
+        for (j = 0 ; j < filterSize - 15; j += 16) {
+            vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
+            vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
+
+            vector signed short src_vA = // vec_unpackh sign-extends...
+                (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
+            vector signed short src_vB = // vec_unpackh sign-extends...
+                (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
+
+            vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
+            vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
+            vector signed short filter_v0  = vec_perm(filter_v0R, filter_v1R, permF);
+            vector signed short filter_v1  = vec_perm(filter_v1R, filter_v2R, permF);
+
+            vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
+            val_v = vec_msums(src_vB, filter_v1, val_acc);
+
+            filter_v0R = filter_v2R;
+            src_v0 = src_v1;
+        }
+
+        if (j < filterSize-7) {
+            // loading src_v0 is useless, it's already done above
+            //vector unsigned char src_v0 = vec_ld(srcPos + j, src);
+            vector unsigned char src_v1, src_vF;
+            vector signed short src_v, filter_v1R, filter_v;
+            if ((((int)src + srcPos)% 16) > 8) {
+                src_v1 = vec_ld(srcPos + j + 16, src);
+            }
+            src_vF = vec_perm(src_v0, src_v1, permS);
+
+            src_v = // vec_unpackh sign-extends...
+                (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
+            // loading filter_v0R is useless, it's already done above
+            //vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter);
+            filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
+            filter_v = vec_perm(filter_v0R, filter_v1R, permF);
+
+            val_v = vec_msums(src_v, filter_v, val_v);
+        }
+
+        val_s = vec_sums(val_v, vzero);
+
+        vec_st(val_s, 0, tempo);
+        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
+    }
+
+    }
+    }
+}
+
+static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                              int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) {
+    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
+    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+    uint8_t *ysrc = src[0];
+    uint8_t *usrc = src[1];
+    uint8_t *vsrc = src[2];
+    const int width = c->srcW;
+    const int height = srcSliceH;
+    const int lumStride = srcStride[0];
+    const int chromStride = srcStride[1];
+    const int dstStride = dstStride_a[0];
+    const vector unsigned char yperm = vec_lvsl(0, ysrc);
+    const int vertLumPerChroma = 2;
+    register unsigned int y;
+
+    if (width&15) {
+        yv12toyuy2(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
+        return srcSliceH;
+    }
+
+    /* This code assumes:
+
+    1) dst is 16 bytes-aligned
+    2) dstStride is a multiple of 16
+    3) width is a multiple of 16
+    4) lum & chrom stride are multiples of 8
+    */
+
+    for (y=0; y<height; y++) {
+        int i;
+        for (i = 0; i < width - 31; i+= 32) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_yA = vec_ld(i, ysrc);
+            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
+            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
+            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
+            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
+            vector unsigned char v_uA = vec_ld(j, usrc);
+            vector unsigned char v_uB = vec_ld(j + 16, usrc);
+            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
+            vector unsigned char v_vA = vec_ld(j, vsrc);
+            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
+            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
+            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
+            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
+            vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
+            vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
+            vec_st(v_yuy2_0, (i << 1), dst);
+            vec_st(v_yuy2_1, (i << 1) + 16, dst);
+            vec_st(v_yuy2_2, (i << 1) + 32, dst);
+            vec_st(v_yuy2_3, (i << 1) + 48, dst);
+        }
+        if (i < width) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_y1 = vec_ld(i, ysrc);
+            vector unsigned char v_u = vec_ld(j, usrc);
+            vector unsigned char v_v = vec_ld(j, vsrc);
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
+            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
+            vec_st(v_yuy2_0, (i << 1), dst);
+            vec_st(v_yuy2_1, (i << 1) + 16, dst);
+        }
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst += dstStride;
+    }
+
+    return srcSliceH;
+}
+
+static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                              int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) {
+    uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
+    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+    uint8_t *ysrc = src[0];
+    uint8_t *usrc = src[1];
+    uint8_t *vsrc = src[2];
+    const int width = c->srcW;
+    const int height = srcSliceH;
+    const int lumStride = srcStride[0];
+    const int chromStride = srcStride[1];
+    const int dstStride = dstStride_a[0];
+    const int vertLumPerChroma = 2;
+    const vector unsigned char yperm = vec_lvsl(0, ysrc);
+    register unsigned int y;
+
+    if (width&15) {
+        yv12touyvy(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
+        return srcSliceH;
+    }
+
+    /* This code assumes:
+
+    1) dst is 16 bytes-aligned
+    2) dstStride is a multiple of 16
+    3) width is a multiple of 16
+    4) lum & chrom stride are multiples of 8
+    */
+
+    for (y=0; y<height; y++) {
+        int i;
+        for (i = 0; i < width - 31; i+= 32) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_yA = vec_ld(i, ysrc);
+            vector unsigned char v_yB = vec_ld(i + 16, ysrc);
+            vector unsigned char v_yC = vec_ld(i + 32, ysrc);
+            vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
+            vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
+            vector unsigned char v_uA = vec_ld(j, usrc);
+            vector unsigned char v_uB = vec_ld(j + 16, usrc);
+            vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
+            vector unsigned char v_vA = vec_ld(j, vsrc);
+            vector unsigned char v_vB = vec_ld(j + 16, vsrc);
+            vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
+            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
+            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
+            vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
+            vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
+            vec_st(v_uyvy_0, (i << 1), dst);
+            vec_st(v_uyvy_1, (i << 1) + 16, dst);
+            vec_st(v_uyvy_2, (i << 1) + 32, dst);
+            vec_st(v_uyvy_3, (i << 1) + 48, dst);
+        }
+        if (i < width) {
+            const unsigned int j = i >> 1;
+            vector unsigned char v_y1 = vec_ld(i, ysrc);
+            vector unsigned char v_u = vec_ld(j, usrc);
+            vector unsigned char v_v = vec_ld(j, vsrc);
+            vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
+            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
+            vec_st(v_uyvy_0, (i << 1), dst);
+            vec_st(v_uyvy_1, (i << 1) + 16, dst);
+        }
+        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+            usrc += chromStride;
+            vsrc += chromStride;
+        }
+        ysrc += lumStride;
+        dst += dstStride;
+    }
+    return srcSliceH;
+}
diff --git a/libswscale/swscale_avoption.c b/libswscale/swscale_avoption.c
new file mode 100644
index 0000000000..996843df1d
--- /dev/null
+++ b/libswscale/swscale_avoption.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avutil.h"
+#include "libavcodec/opt.h"
+#include "swscale.h"
+#include "swscale_internal.h"
+
+static const char * sws_context_to_name(void * ptr) {
+    return "swscaler";
+}
+
+#define OFFSET(x) offsetof(SwsContext, x)
+#define DEFAULT 0
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+
+static const AVOption options[] = {
+    { "sws_flags", "scaler/cpu flags", OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, 0, UINT_MAX, VE, "sws_flags" },
+    { "fast_bilinear", "fast bilinear", 0, FF_OPT_TYPE_CONST, SWS_FAST_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bilinear", "bilinear", 0, FF_OPT_TYPE_CONST, SWS_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bicubic", "bicubic", 0, FF_OPT_TYPE_CONST, SWS_BICUBIC, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "experimental", "experimental", 0, FF_OPT_TYPE_CONST, SWS_X, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "neighbor", "nearest neighbor", 0, FF_OPT_TYPE_CONST, SWS_POINT, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "area", "averaging area", 0, FF_OPT_TYPE_CONST, SWS_AREA, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bicublin", "luma bicubic, chroma bilinear", 0, FF_OPT_TYPE_CONST, SWS_BICUBLIN, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "gauss", "gaussian", 0, FF_OPT_TYPE_CONST, SWS_GAUSS, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "sinc", "sinc", 0, FF_OPT_TYPE_CONST, SWS_SINC, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "lanczos", "lanczos", 0, FF_OPT_TYPE_CONST, SWS_LANCZOS, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { NULL }
+};
+
+const AVClass sws_context_class = { "SWScaler", sws_context_to_name, options };
diff --git a/libswscale/swscale_bfin.c b/libswscale/swscale_bfin.c
new file mode 100644
index 0000000000..ed7d9579b6
--- /dev/null
+++ b/libswscale/swscale_bfin.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
+ *
+ * Blackfin software video scaler operations
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+#include "config.h"
+#include <unistd.h>
+#include "rgb2rgb.h"
+#include "swscale.h"
+#include "swscale_internal.h"
+
+#ifdef __FDPIC__
+#define L1CODE __attribute__ ((l1_text))
+#else
+#define L1CODE
+#endif
+
+int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                        long width, long height,
+                        long lumStride, long chromStride, long srcStride) L1CODE;
+
+int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                        long width, long height,
+                        long lumStride, long chromStride, long srcStride) L1CODE;
+
+static int uyvytoyv12_unscaled (SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY;
+    uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2;
+    uint8_t *ip   = src[0] + srcStride[0]*srcSliceY;
+    int w         = dstStride[0];
+
+    ff_bfin_uyvytoyv12 (ip, dsty, dstu, dstv, w, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
+static int yuyvtoyv12_unscaled (SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY;
+    uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2;
+    uint8_t *ip   = src[0] + srcStride[0]*srcSliceY;
+    int w         = dstStride[0];
+
+    ff_bfin_yuyvtoyv12 (ip, dsty, dstu, dstv, w, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
+
+void ff_bfin_get_unscaled_swscale (SwsContext *c)
+{
+    SwsFunc swScale = c->swScale;
+    if (c->flags & SWS_CPU_CAPS_BFIN)
+        if (c->dstFormat == PIX_FMT_YUV420P)
+            if (c->srcFormat == PIX_FMT_UYVY422) {
+                av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
+                c->swScale = uyvytoyv12_unscaled;
+            }
+        if (c->dstFormat == PIX_FMT_YUV420P)
+            if (c->srcFormat == PIX_FMT_YUYV422) {
+                av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
+                c->swScale = yuyvtoyv12_unscaled;
+            }
+}
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
new file mode 100644
index 0000000000..cdf3754d14
--- /dev/null
+++ b/libswscale/swscale_internal.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWSCALE_SWSCALE_INTERNAL_H
+#define SWSCALE_SWSCALE_INTERNAL_H
+
+#include "config.h"
+
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+
+#include "libavutil/avutil.h"
+
+#define STR(s)         AV_TOSTRING(s) //AV_STRINGIFY is too long
+
+#define MAX_FILTER_SIZE 256
+
+#define VOFW 2048
+#define VOF  (VOFW*2)
+
+#ifdef WORDS_BIGENDIAN
+#define ALT32_CORR (-1)
+#else
+#define ALT32_CORR   1
+#endif
+
+#if ARCH_X86_64
+#   define APCK_PTR2 8
+#   define APCK_COEF 16
+#   define APCK_SIZE 24
+#else
+#   define APCK_PTR2 4
+#   define APCK_COEF 8
+#   define APCK_SIZE 16
+#endif
+
+struct SwsContext;
+
+typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
+             int srcSliceH, uint8_t* dst[], int dstStride[]);
+
+/* This struct should be aligned on at least a 32-byte boundary. */
+typedef struct SwsContext{
+    /**
+     * info on struct for av_log
+     */
+    const AVClass *av_class;
+
+    /**
+     * Note that src, dst, srcStride, dstStride will be copied in the
+     * sws_scale() wrapper so they can be freely modified here.
+     */
+    SwsFunc swScale;
+    int srcW, srcH, dstH;
+    int chrSrcW, chrSrcH, chrDstW, chrDstH;
+    int lumXInc, chrXInc;
+    int lumYInc, chrYInc;
+    enum PixelFormat dstFormat, srcFormat;  ///< format 4:2:0 type is always YV12
+    int origDstFormat, origSrcFormat;       ///< format
+    int chrSrcHSubSample, chrSrcVSubSample;
+    int chrIntHSubSample, chrIntVSubSample;
+    int chrDstHSubSample, chrDstVSubSample;
+    int vChrDrop;
+    int sliceDir;
+    double param[2];
+
+    uint32_t pal_yuv[256];
+    uint32_t pal_rgb[256];
+
+    int16_t **lumPixBuf;
+    int16_t **chrPixBuf;
+    int16_t *hLumFilter;
+    int16_t *hLumFilterPos;
+    int16_t *hChrFilter;
+    int16_t *hChrFilterPos;
+    int16_t *vLumFilter;
+    int16_t *vLumFilterPos;
+    int16_t *vChrFilter;
+    int16_t *vChrFilterPos;
+
+    uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful
+
+    int hLumFilterSize;
+    int hChrFilterSize;
+    int vLumFilterSize;
+    int vChrFilterSize;
+    int vLumBufSize;
+    int vChrBufSize;
+
+    uint8_t *funnyYCode;
+    uint8_t *funnyUVCode;
+    int32_t *lumMmx2FilterPos;
+    int32_t *chrMmx2FilterPos;
+    int16_t *lumMmx2Filter;
+    int16_t *chrMmx2Filter;
+
+    int canMMX2BeUsed;
+
+    int lastInLumBuf;
+    int lastInChrBuf;
+    int lumBufIndex;
+    int chrBufIndex;
+    int dstY;
+    int flags;
+    void * yuvTable;            // pointer to the yuv->rgb table start so it can be freed()
+    uint8_t * table_rV[256];
+    uint8_t * table_gU[256];
+    int    table_gV[256];
+    uint8_t * table_bU[256];
+
+    //Colorspace stuff
+    int contrast, brightness, saturation;    // for sws_getColorspaceDetails
+    int srcColorspaceTable[4];
+    int dstColorspaceTable[4];
+    int srcRange, dstRange;
+    int yuv2rgb_y_offset;
+    int yuv2rgb_y_coeff;
+    int yuv2rgb_v2r_coeff;
+    int yuv2rgb_v2g_coeff;
+    int yuv2rgb_u2g_coeff;
+    int yuv2rgb_u2b_coeff;
+
+#define RED_DITHER            "0*8"
+#define GREEN_DITHER          "1*8"
+#define BLUE_DITHER           "2*8"
+#define Y_COEFF               "3*8"
+#define VR_COEFF              "4*8"
+#define UB_COEFF              "5*8"
+#define VG_COEFF              "6*8"
+#define UG_COEFF              "7*8"
+#define Y_OFFSET              "8*8"
+#define U_OFFSET              "9*8"
+#define V_OFFSET              "10*8"
+#define LUM_MMX_FILTER_OFFSET "11*8"
+#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
+#define DSTW_OFFSET           "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
+#define ESP_OFFSET            "11*8+4*4*256*2+8"
+#define VROUNDER_OFFSET       "11*8+4*4*256*2+16"
+#define U_TEMP                "11*8+4*4*256*2+24"
+#define V_TEMP                "11*8+4*4*256*2+32"
+
+    uint64_t redDither   __attribute__((aligned(8)));
+    uint64_t greenDither __attribute__((aligned(8)));
+    uint64_t blueDither  __attribute__((aligned(8)));
+
+    uint64_t yCoeff      __attribute__((aligned(8)));
+    uint64_t vrCoeff     __attribute__((aligned(8)));
+    uint64_t ubCoeff     __attribute__((aligned(8)));
+    uint64_t vgCoeff     __attribute__((aligned(8)));
+    uint64_t ugCoeff     __attribute__((aligned(8)));
+    uint64_t yOffset     __attribute__((aligned(8)));
+    uint64_t uOffset     __attribute__((aligned(8)));
+    uint64_t vOffset     __attribute__((aligned(8)));
+    int32_t  lumMmxFilter[4*MAX_FILTER_SIZE];
+    int32_t  chrMmxFilter[4*MAX_FILTER_SIZE];
+    int dstW;
+    uint64_t esp          __attribute__((aligned(8)));
+    uint64_t vRounder     __attribute__((aligned(8)));
+    uint64_t u_temp       __attribute__((aligned(8)));
+    uint64_t v_temp       __attribute__((aligned(8)));
+
+#if HAVE_ALTIVEC
+
+  vector signed short   CY;
+  vector signed short   CRV;
+  vector signed short   CBU;
+  vector signed short   CGU;
+  vector signed short   CGV;
+  vector signed short   OY;
+  vector unsigned short CSHIFT;
+  vector signed short   *vYCoeffsBank, *vCCoeffsBank;
+
+#endif
+
+
+#if ARCH_BFIN
+    uint32_t oy           __attribute__((aligned(4)));
+    uint32_t oc           __attribute__((aligned(4)));
+    uint32_t zero         __attribute__((aligned(4)));
+    uint32_t cy           __attribute__((aligned(4)));
+    uint32_t crv          __attribute__((aligned(4)));
+    uint32_t rmask        __attribute__((aligned(4)));
+    uint32_t cbu          __attribute__((aligned(4)));
+    uint32_t bmask        __attribute__((aligned(4)));
+    uint32_t cgu          __attribute__((aligned(4)));
+    uint32_t cgv          __attribute__((aligned(4)));
+    uint32_t gmask        __attribute__((aligned(4)));
+#endif
+
+#if HAVE_VIS
+    uint64_t sparc_coeffs[10] __attribute__((aligned(8)));
+#endif
+
+} SwsContext;
+//FIXME check init (where 0)
+
+SwsFunc sws_yuv2rgb_get_func_ptr (SwsContext *c);
+int sws_yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation);
+
+void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation);
+SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c);
+void altivec_yuv2packedX (SwsContext *c,
+                          int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                          int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                          uint8_t *dest, int dstW, int dstY);
+
+const char *sws_format_name(int format);
+
+//FIXME replace this with something faster
+#define isPlanarYUV(x)  (           \
+           (x)==PIX_FMT_YUV410P     \
+        || (x)==PIX_FMT_YUV420P     \
+        || (x)==PIX_FMT_YUV411P     \
+        || (x)==PIX_FMT_YUV422P     \
+        || (x)==PIX_FMT_YUV444P     \
+        || (x)==PIX_FMT_YUV440P     \
+        || (x)==PIX_FMT_NV12        \
+        || (x)==PIX_FMT_NV21        \
+    )
+#define isYUV(x)        (           \
+           (x)==PIX_FMT_UYVY422     \
+        || (x)==PIX_FMT_YUYV422     \
+        || isPlanarYUV(x)           \
+    )
+#define isGray(x)       (           \
+           (x)==PIX_FMT_GRAY8       \
+        || (x)==PIX_FMT_GRAY16BE    \
+        || (x)==PIX_FMT_GRAY16LE    \
+    )
+#define isGray16(x)     (           \
+           (x)==PIX_FMT_GRAY16BE    \
+        || (x)==PIX_FMT_GRAY16LE    \
+    )
+#define isRGB(x)        (           \
+           (x)==PIX_FMT_RGB32       \
+        || (x)==PIX_FMT_RGB32_1     \
+        || (x)==PIX_FMT_RGB24       \
+        || (x)==PIX_FMT_RGB565      \
+        || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_RGB8        \
+        || (x)==PIX_FMT_RGB4        \
+        || (x)==PIX_FMT_RGB4_BYTE   \
+        || (x)==PIX_FMT_MONOBLACK   \
+        || (x)==PIX_FMT_MONOWHITE   \
+    )
+#define isBGR(x)        (           \
+           (x)==PIX_FMT_BGR32       \
+        || (x)==PIX_FMT_BGR32_1     \
+        || (x)==PIX_FMT_BGR24       \
+        || (x)==PIX_FMT_BGR565      \
+        || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR8        \
+        || (x)==PIX_FMT_BGR4        \
+        || (x)==PIX_FMT_BGR4_BYTE   \
+        || (x)==PIX_FMT_MONOBLACK   \
+        || (x)==PIX_FMT_MONOWHITE   \
+    )
+#define isALPHA(x)      (           \
+           (x)==PIX_FMT_BGR32       \
+        || (x)==PIX_FMT_BGR32_1     \
+        || (x)==PIX_FMT_RGB32       \
+        || (x)==PIX_FMT_RGB32_1     \
+        || (x)==PIX_FMT_YUVA420P    \
+    )
+
+static inline int fmt_depth(int fmt)
+{
+    switch(fmt) {
+        case PIX_FMT_BGRA:
+        case PIX_FMT_ABGR:
+        case PIX_FMT_RGBA:
+        case PIX_FMT_ARGB:
+            return 32;
+        case PIX_FMT_BGR24:
+        case PIX_FMT_RGB24:
+            return 24;
+        case PIX_FMT_BGR565:
+        case PIX_FMT_RGB565:
+        case PIX_FMT_GRAY16BE:
+        case PIX_FMT_GRAY16LE:
+            return 16;
+        case PIX_FMT_BGR555:
+        case PIX_FMT_RGB555:
+            return 15;
+        case PIX_FMT_BGR8:
+        case PIX_FMT_RGB8:
+            return 8;
+        case PIX_FMT_BGR4:
+        case PIX_FMT_RGB4:
+        case PIX_FMT_BGR4_BYTE:
+        case PIX_FMT_RGB4_BYTE:
+            return 4;
+        case PIX_FMT_MONOBLACK:
+        case PIX_FMT_MONOWHITE:
+            return 1;
+        default:
+            return 0;
+    }
+}
+
+extern const uint64_t ff_dither4[2];
+extern const uint64_t ff_dither8[2];
+
+extern const AVClass sws_context_class;
+
+#endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
new file mode 100644
index 0000000000..3262b6ee85
--- /dev/null
+++ b/libswscale/swscale_template.c
@@ -0,0 +1,3041 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * The C code (not assembly, MMX, ...) of this file can be used
+ * under the LGPL license.
+ */
+
+#undef REAL_MOVNTQ
+#undef MOVNTQ
+#undef PAVGB
+#undef PREFETCH
+#undef PREFETCHW
+#undef EMMS
+#undef SFENCE
+
+#if HAVE_AMD3DNOW
+/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
+#define EMMS     "femms"
+#else
+#define EMMS     "emms"
+#endif
+
+#if HAVE_AMD3DNOW
+#define PREFETCH  "prefetch"
+#define PREFETCHW "prefetchw"
+#elif HAVE_MMX2
+#define PREFETCH "prefetchnta"
+#define PREFETCHW "prefetcht0"
+#else
+#define PREFETCH  " # nop"
+#define PREFETCHW " # nop"
+#endif
+
+#if HAVE_MMX2
+#define SFENCE "sfence"
+#else
+#define SFENCE " # nop"
+#endif
+
+#if HAVE_MMX2
+#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
+#elif HAVE_AMD3DNOW
+#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
+#endif
+
+#if HAVE_MMX2
+#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
+#else
+#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
+#endif
+#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
+
+#if HAVE_ALTIVEC
+#include "swscale_altivec_template.c"
+#endif
+
+#define YSCALEYUV2YV12X(x, offset, dest, width) \
+    __asm__ volatile(\
+    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
+    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
+    "movq                             %%mm3, %%mm4      \n\t"\
+    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+    ASMALIGN(4) /* FIXME Unroll? */\
+    "1:                                                 \n\t"\
+    "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
+    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
+    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
+    "add                                $16, %%"REG_d"  \n\t"\
+    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+    "test                         %%"REG_S", %%"REG_S"  \n\t"\
+    "pmulhw                           %%mm0, %%mm2      \n\t"\
+    "pmulhw                           %%mm0, %%mm5      \n\t"\
+    "paddw                            %%mm2, %%mm3      \n\t"\
+    "paddw                            %%mm5, %%mm4      \n\t"\
+    " jnz                                1b             \n\t"\
+    "psraw                               $3, %%mm3      \n\t"\
+    "psraw                               $3, %%mm4      \n\t"\
+    "packuswb                         %%mm4, %%mm3      \n\t"\
+    MOVNTQ(%%mm3, (%1, %%REGa))\
+    "add                                 $8, %%"REG_a"  \n\t"\
+    "cmp                                 %2, %%"REG_a"  \n\t"\
+    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
+    "movq                             %%mm3, %%mm4      \n\t"\
+    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+    "jb                                  1b             \n\t"\
+    :: "r" (&c->redDither),\
+    "r" (dest), "g" (width)\
+    : "%"REG_a, "%"REG_d, "%"REG_S\
+    );
+
+#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
+    __asm__ volatile(\
+    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
+    "pxor                             %%mm4, %%mm4      \n\t"\
+    "pxor                             %%mm5, %%mm5      \n\t"\
+    "pxor                             %%mm6, %%mm6      \n\t"\
+    "pxor                             %%mm7, %%mm7      \n\t"\
+    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+    ASMALIGN(4) \
+    "1:                                                 \n\t"\
+    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
+    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
+    "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
+    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
+    "movq                             %%mm0, %%mm3      \n\t"\
+    "punpcklwd                        %%mm1, %%mm0      \n\t"\
+    "punpckhwd                        %%mm1, %%mm3      \n\t"\
+    "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
+    "pmaddwd                          %%mm1, %%mm0      \n\t"\
+    "pmaddwd                          %%mm1, %%mm3      \n\t"\
+    "paddd                            %%mm0, %%mm4      \n\t"\
+    "paddd                            %%mm3, %%mm5      \n\t"\
+    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
+    "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
+    "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
+    "test                         %%"REG_S", %%"REG_S"  \n\t"\
+    "movq                             %%mm2, %%mm0      \n\t"\
+    "punpcklwd                        %%mm3, %%mm2      \n\t"\
+    "punpckhwd                        %%mm3, %%mm0      \n\t"\
+    "pmaddwd                          %%mm1, %%mm2      \n\t"\
+    "pmaddwd                          %%mm1, %%mm0      \n\t"\
+    "paddd                            %%mm2, %%mm6      \n\t"\
+    "paddd                            %%mm0, %%mm7      \n\t"\
+    " jnz                                1b             \n\t"\
+    "psrad                              $16, %%mm4      \n\t"\
+    "psrad                              $16, %%mm5      \n\t"\
+    "psrad                              $16, %%mm6      \n\t"\
+    "psrad                              $16, %%mm7      \n\t"\
+    "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
+    "packssdw                         %%mm5, %%mm4      \n\t"\
+    "packssdw                         %%mm7, %%mm6      \n\t"\
+    "paddw                            %%mm0, %%mm4      \n\t"\
+    "paddw                            %%mm0, %%mm6      \n\t"\
+    "psraw                               $3, %%mm4      \n\t"\
+    "psraw                               $3, %%mm6      \n\t"\
+    "packuswb                         %%mm6, %%mm4      \n\t"\
+    MOVNTQ(%%mm4, (%1, %%REGa))\
+    "add                                 $8, %%"REG_a"  \n\t"\
+    "cmp                                 %2, %%"REG_a"  \n\t"\
+    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
+    "pxor                             %%mm4, %%mm4      \n\t"\
+    "pxor                             %%mm5, %%mm5      \n\t"\
+    "pxor                             %%mm6, %%mm6      \n\t"\
+    "pxor                             %%mm7, %%mm7      \n\t"\
+    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+    "jb                                  1b             \n\t"\
+    :: "r" (&c->redDither),\
+    "r" (dest), "g" (width)\
+    : "%"REG_a, "%"REG_d, "%"REG_S\
+    );
+
+#define YSCALEYUV2YV121 \
+    "mov %2, %%"REG_a"                    \n\t"\
+    ASMALIGN(4) /* FIXME Unroll? */\
+    "1:                                   \n\t"\
+    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
+    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
+    "psraw                 $7, %%mm0      \n\t"\
+    "psraw                 $7, %%mm1      \n\t"\
+    "packuswb           %%mm1, %%mm0      \n\t"\
+    MOVNTQ(%%mm0, (%1, %%REGa))\
+    "add                   $8, %%"REG_a"  \n\t"\
+    "jnc                   1b             \n\t"
+
+#define YSCALEYUV2YV121_ACCURATE \
+    "mov %2, %%"REG_a"                    \n\t"\
+    "pcmpeqw %%mm7, %%mm7                 \n\t"\
+    "psrlw                 $15, %%mm7     \n\t"\
+    "psllw                  $6, %%mm7     \n\t"\
+    ASMALIGN(4) /* FIXME Unroll? */\
+    "1:                                   \n\t"\
+    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
+    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
+    "paddsw             %%mm7, %%mm0      \n\t"\
+    "paddsw             %%mm7, %%mm1      \n\t"\
+    "psraw                 $7, %%mm0      \n\t"\
+    "psraw                 $7, %%mm1      \n\t"\
+    "packuswb           %%mm1, %%mm0      \n\t"\
+    MOVNTQ(%%mm0, (%1, %%REGa))\
+    "add                   $8, %%"REG_a"  \n\t"\
+    "jnc                   1b             \n\t"
+
+/*
+    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
+       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
+       "r" (dest), "m" (dstW),
+       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
+    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
+*/
+#define YSCALEYUV2PACKEDX_UV \
+    __asm__ volatile(\
+    "xor                   %%"REG_a", %%"REG_a"     \n\t"\
+    ASMALIGN(4)\
+    "nop                                            \n\t"\
+    "1:                                             \n\t"\
+    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
+    "movq                      %%mm3, %%mm4         \n\t"\
+    ASMALIGN(4)\
+    "2:                                             \n\t"\
+    "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
+    "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
+    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
+    "add                         $16, %%"REG_d"     \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "pmulhw                    %%mm0, %%mm2         \n\t"\
+    "pmulhw                    %%mm0, %%mm5         \n\t"\
+    "paddw                     %%mm2, %%mm3         \n\t"\
+    "paddw                     %%mm5, %%mm4         \n\t"\
+    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    " jnz                         2b                \n\t"\
+
+#define YSCALEYUV2PACKEDX_YA(offset) \
+    "lea                "offset"(%0), %%"REG_d"     \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "movq      "VROUNDER_OFFSET"(%0), %%mm1         \n\t"\
+    "movq                      %%mm1, %%mm7         \n\t"\
+    ASMALIGN(4)\
+    "2:                                             \n\t"\
+    "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
+    "movq  (%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y1srcData */\
+    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5         \n\t" /* Y2srcData */\
+    "add                         $16, %%"REG_d"            \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "pmulhw                    %%mm0, %%mm2         \n\t"\
+    "pmulhw                    %%mm0, %%mm5         \n\t"\
+    "paddw                     %%mm2, %%mm1         \n\t"\
+    "paddw                     %%mm5, %%mm7         \n\t"\
+    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    " jnz                         2b                \n\t"\
+
+#define YSCALEYUV2PACKEDX \
+    YSCALEYUV2PACKEDX_UV \
+    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET) \
+
+#define YSCALEYUV2PACKEDX_END                 \
+    :: "r" (&c->redDither),                   \
+        "m" (dummy), "m" (dummy), "m" (dummy),\
+        "r" (dest), "m" (dstW)                \
+    : "%"REG_a, "%"REG_d, "%"REG_S            \
+    );
+
+#define YSCALEYUV2PACKEDX_ACCURATE_UV \
+    __asm__ volatile(\
+    "xor %%"REG_a", %%"REG_a"                       \n\t"\
+    ASMALIGN(4)\
+    "nop                                            \n\t"\
+    "1:                                             \n\t"\
+    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "pxor                      %%mm4, %%mm4         \n\t"\
+    "pxor                      %%mm5, %%mm5         \n\t"\
+    "pxor                      %%mm6, %%mm6         \n\t"\
+    "pxor                      %%mm7, %%mm7         \n\t"\
+    ASMALIGN(4)\
+    "2:                                             \n\t"\
+    "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
+    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
+    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
+    "movq                      %%mm0, %%mm3         \n\t"\
+    "punpcklwd                 %%mm1, %%mm0         \n\t"\
+    "punpckhwd                 %%mm1, %%mm3         \n\t"\
+    "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
+    "pmaddwd                   %%mm1, %%mm0         \n\t"\
+    "pmaddwd                   %%mm1, %%mm3         \n\t"\
+    "paddd                     %%mm0, %%mm4         \n\t"\
+    "paddd                     %%mm3, %%mm5         \n\t"\
+    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
+    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
+    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    "movq                      %%mm2, %%mm0         \n\t"\
+    "punpcklwd                 %%mm3, %%mm2         \n\t"\
+    "punpckhwd                 %%mm3, %%mm0         \n\t"\
+    "pmaddwd                   %%mm1, %%mm2         \n\t"\
+    "pmaddwd                   %%mm1, %%mm0         \n\t"\
+    "paddd                     %%mm2, %%mm6         \n\t"\
+    "paddd                     %%mm0, %%mm7         \n\t"\
+    " jnz                         2b                \n\t"\
+    "psrad                       $16, %%mm4         \n\t"\
+    "psrad                       $16, %%mm5         \n\t"\
+    "psrad                       $16, %%mm6         \n\t"\
+    "psrad                       $16, %%mm7         \n\t"\
+    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
+    "packssdw                  %%mm5, %%mm4         \n\t"\
+    "packssdw                  %%mm7, %%mm6         \n\t"\
+    "paddw                     %%mm0, %%mm4         \n\t"\
+    "paddw                     %%mm0, %%mm6         \n\t"\
+    "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
+    "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
+
+#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
+    "lea                "offset"(%0), %%"REG_d"     \n\t"\
+    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "pxor                      %%mm1, %%mm1         \n\t"\
+    "pxor                      %%mm5, %%mm5         \n\t"\
+    "pxor                      %%mm7, %%mm7         \n\t"\
+    "pxor                      %%mm6, %%mm6         \n\t"\
+    ASMALIGN(4)\
+    "2:                                             \n\t"\
+    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
+    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
+    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
+    "movq                      %%mm0, %%mm3         \n\t"\
+    "punpcklwd                 %%mm4, %%mm0         \n\t"\
+    "punpckhwd                 %%mm4, %%mm3         \n\t"\
+    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
+    "pmaddwd                   %%mm4, %%mm0         \n\t"\
+    "pmaddwd                   %%mm4, %%mm3         \n\t"\
+    "paddd                     %%mm0, %%mm1         \n\t"\
+    "paddd                     %%mm3, %%mm5         \n\t"\
+    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
+    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
+    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    "movq                      %%mm2, %%mm0         \n\t"\
+    "punpcklwd                 %%mm3, %%mm2         \n\t"\
+    "punpckhwd                 %%mm3, %%mm0         \n\t"\
+    "pmaddwd                   %%mm4, %%mm2         \n\t"\
+    "pmaddwd                   %%mm4, %%mm0         \n\t"\
+    "paddd                     %%mm2, %%mm7         \n\t"\
+    "paddd                     %%mm0, %%mm6         \n\t"\
+    " jnz                         2b                \n\t"\
+    "psrad                       $16, %%mm1         \n\t"\
+    "psrad                       $16, %%mm5         \n\t"\
+    "psrad                       $16, %%mm7         \n\t"\
+    "psrad                       $16, %%mm6         \n\t"\
+    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
+    "packssdw                  %%mm5, %%mm1         \n\t"\
+    "packssdw                  %%mm6, %%mm7         \n\t"\
+    "paddw                     %%mm0, %%mm1         \n\t"\
+    "paddw                     %%mm0, %%mm7         \n\t"\
+    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
+    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
+
+#define YSCALEYUV2PACKEDX_ACCURATE \
+    YSCALEYUV2PACKEDX_ACCURATE_UV \
+    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
+
+#define YSCALEYUV2RGBX \
+    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
+    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
+    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
+    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
+/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
+    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
+    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
+    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
+/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw           %%mm3, %%mm4       \n\t"\
+    "movq            %%mm2, %%mm0       \n\t"\
+    "movq            %%mm5, %%mm6       \n\t"\
+    "movq            %%mm4, %%mm3       \n\t"\
+    "punpcklwd       %%mm2, %%mm2       \n\t"\
+    "punpcklwd       %%mm5, %%mm5       \n\t"\
+    "punpcklwd       %%mm4, %%mm4       \n\t"\
+    "paddw           %%mm1, %%mm2       \n\t"\
+    "paddw           %%mm1, %%mm5       \n\t"\
+    "paddw           %%mm1, %%mm4       \n\t"\
+    "punpckhwd       %%mm0, %%mm0       \n\t"\
+    "punpckhwd       %%mm6, %%mm6       \n\t"\
+    "punpckhwd       %%mm3, %%mm3       \n\t"\
+    "paddw           %%mm7, %%mm0       \n\t"\
+    "paddw           %%mm7, %%mm6       \n\t"\
+    "paddw           %%mm7, %%mm3       \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb        %%mm0, %%mm2       \n\t"\
+    "packuswb        %%mm6, %%mm5       \n\t"\
+    "packuswb        %%mm3, %%mm4       \n\t"\
+
+#define REAL_YSCALEYUV2PACKED(index, c) \
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
+    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
+    "psraw                $3, %%mm0                           \n\t"\
+    "psraw                $3, %%mm1                           \n\t"\
+    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
+    "xor            "#index", "#index"                        \n\t"\
+    ASMALIGN(4)\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+
+#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
+
+#define REAL_YSCALEYUV2RGB_UV(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ASMALIGN(4)\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
+    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
+    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
+    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
+    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+
+#define REAL_YSCALEYUV2RGB_YA(index, c) \
+    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
+    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
+    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
+    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
+
+#define REAL_YSCALEYUV2RGB_COEFF(c) \
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB_YA(index, c) REAL_YSCALEYUV2RGB_YA(index, c)
+
+#define YSCALEYUV2RGB(index, c) \
+    REAL_YSCALEYUV2RGB_UV(index, c) \
+    REAL_YSCALEYUV2RGB_YA(index, c) \
+    REAL_YSCALEYUV2RGB_COEFF(c)
+
+#define REAL_YSCALEYUV2PACKED1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ASMALIGN(4)\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $7, %%mm3     \n\t" \
+    "psraw                $7, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t" \
+
+#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
+
+#define REAL_YSCALEYUV2RGB1(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ASMALIGN(4)\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
+    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
+    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
+
+#define REAL_YSCALEYUV2PACKED1b(index, c) \
+    "xor "#index", "#index"             \n\t"\
+    ASMALIGN(4)\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $8, %%mm3     \n\t" \
+    "psrlw                $8, %%mm4     \n\t" \
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $7, %%mm1     \n\t" \
+    "psraw                $7, %%mm7     \n\t"
+#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
+
+// do vertical chrominance interpolation
+#define REAL_YSCALEYUV2RGB1b(index, c) \
+    "xor            "#index", "#index"  \n\t"\
+    ASMALIGN(4)\
+    "1:                                 \n\t"\
+    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
+    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
+    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
+    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
+    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
+    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
+    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
+    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
+    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
+    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
+    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
+    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
+    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
+    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
+    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
+    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
+    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
+    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
+    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
+    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
+    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
+    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
+    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
+    "paddw             %%mm3, %%mm4     \n\t"\
+    "movq              %%mm2, %%mm0     \n\t"\
+    "movq              %%mm5, %%mm6     \n\t"\
+    "movq              %%mm4, %%mm3     \n\t"\
+    "punpcklwd         %%mm2, %%mm2     \n\t"\
+    "punpcklwd         %%mm5, %%mm5     \n\t"\
+    "punpcklwd         %%mm4, %%mm4     \n\t"\
+    "paddw             %%mm1, %%mm2     \n\t"\
+    "paddw             %%mm1, %%mm5     \n\t"\
+    "paddw             %%mm1, %%mm4     \n\t"\
+    "punpckhwd         %%mm0, %%mm0     \n\t"\
+    "punpckhwd         %%mm6, %%mm6     \n\t"\
+    "punpckhwd         %%mm3, %%mm3     \n\t"\
+    "paddw             %%mm7, %%mm0     \n\t"\
+    "paddw             %%mm7, %%mm6     \n\t"\
+    "paddw             %%mm7, %%mm3     \n\t"\
+    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
+    "packuswb          %%mm0, %%mm2     \n\t"\
+    "packuswb          %%mm6, %%mm5     \n\t"\
+    "packuswb          %%mm3, %%mm4     \n\t"\
+
+#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
+
+#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
+    "movq       "#b", "#q2"     \n\t" /* B */\
+    "movq       "#r", "#t"      \n\t" /* R */\
+    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
+    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
+    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
+    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
+    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
+    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
+    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
+    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
+    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
+    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
+\
+    MOVNTQ(   q0,   (dst, index, 4))\
+    MOVNTQ(    b,  8(dst, index, 4))\
+    MOVNTQ(   q2, 16(dst, index, 4))\
+    MOVNTQ(   q3, 24(dst, index, 4))\
+\
+    "add      $8, "#index"      \n\t"\
+    "cmp "#dstw", "#index"      \n\t"\
+    " jb      1b                \n\t"
+#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
+
+#define REAL_WRITERGB16(dst, dstw, index) \
+    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
+    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
+    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
+    "psrlq           $3, %%mm2  \n\t"\
+\
+    "movq         %%mm2, %%mm1  \n\t"\
+    "movq         %%mm4, %%mm3  \n\t"\
+\
+    "punpcklbw    %%mm7, %%mm3  \n\t"\
+    "punpcklbw    %%mm5, %%mm2  \n\t"\
+    "punpckhbw    %%mm7, %%mm4  \n\t"\
+    "punpckhbw    %%mm5, %%mm1  \n\t"\
+\
+    "psllq           $3, %%mm3  \n\t"\
+    "psllq           $3, %%mm4  \n\t"\
+\
+    "por          %%mm3, %%mm2  \n\t"\
+    "por          %%mm4, %%mm1  \n\t"\
+\
+    MOVNTQ(%%mm2,  (dst, index, 2))\
+    MOVNTQ(%%mm1, 8(dst, index, 2))\
+\
+    "add             $8, "#index"   \n\t"\
+    "cmp        "#dstw", "#index"   \n\t"\
+    " jb             1b             \n\t"
+#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
+
+#define REAL_WRITERGB15(dst, dstw, index) \
+    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
+    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
+    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
+    "psrlq           $3, %%mm2  \n\t"\
+    "psrlq           $1, %%mm5  \n\t"\
+\
+    "movq         %%mm2, %%mm1  \n\t"\
+    "movq         %%mm4, %%mm3  \n\t"\
+\
+    "punpcklbw    %%mm7, %%mm3  \n\t"\
+    "punpcklbw    %%mm5, %%mm2  \n\t"\
+    "punpckhbw    %%mm7, %%mm4  \n\t"\
+    "punpckhbw    %%mm5, %%mm1  \n\t"\
+\
+    "psllq           $2, %%mm3  \n\t"\
+    "psllq           $2, %%mm4  \n\t"\
+\
+    "por          %%mm3, %%mm2  \n\t"\
+    "por          %%mm4, %%mm1  \n\t"\
+\
+    MOVNTQ(%%mm2,  (dst, index, 2))\
+    MOVNTQ(%%mm1, 8(dst, index, 2))\
+\
+    "add             $8, "#index"   \n\t"\
+    "cmp        "#dstw", "#index"   \n\t"\
+    " jb             1b             \n\t"
+#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
+
+#define WRITEBGR24OLD(dst, dstw, index) \
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
+    "movq      %%mm2, %%mm1             \n\t" /* B */\
+    "movq      %%mm5, %%mm6             \n\t" /* R */\
+    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
+    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
+    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
+    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
+    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
+    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
+    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
+    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
+    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
+    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
+\
+    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
+    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
+    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
+    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
+    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
+    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
+    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
+    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
+\
+    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
+    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
+    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
+    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
+    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
+    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
+    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
+    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
+    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
+    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
+    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
+    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
+    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
+\
+    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
+    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
+    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
+    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
+    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
+    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
+    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
+    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
+\
+    MOVNTQ(%%mm0,   (dst))\
+    MOVNTQ(%%mm2,  8(dst))\
+    MOVNTQ(%%mm3, 16(dst))\
+    "add         $24, "#dst"            \n\t"\
+\
+    "add          $8, "#index"          \n\t"\
+    "cmp     "#dstw", "#index"          \n\t"\
+    " jb          1b                    \n\t"
+
+#define WRITEBGR24MMX(dst, dstw, index) \
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
+    "movq      %%mm2, %%mm1     \n\t" /* B */\
+    "movq      %%mm5, %%mm6     \n\t" /* R */\
+    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
+    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
+    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
+    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
+    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
+    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
+    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
+    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
+    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
+    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
+\
+    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
+    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
+    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
+    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
+\
+    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
+    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
+    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
+    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
+\
+    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
+    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
+    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
+    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
+\
+    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
+    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
+    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
+    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
+    MOVNTQ(%%mm0, (dst))\
+\
+    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
+    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
+    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
+    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
+    MOVNTQ(%%mm6, 8(dst))\
+\
+    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
+    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
+    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
+    MOVNTQ(%%mm5, 16(dst))\
+\
+    "add         $24, "#dst"    \n\t"\
+\
+    "add          $8, "#index"  \n\t"\
+    "cmp     "#dstw", "#index"  \n\t"\
+    " jb          1b            \n\t"
+
+#define WRITEBGR24MMX2(dst, dstw, index) \
+    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
+    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
+    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
+    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
+    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
+    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
+\
+    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
+    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
+    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
+\
+    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
+    "por    %%mm1, %%mm6        \n\t"\
+    "por    %%mm3, %%mm6        \n\t"\
+    MOVNTQ(%%mm6, (dst))\
+\
+    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
+    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
+    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
+    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
+\
+    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
+    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
+    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
+\
+    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
+    "por    %%mm3, %%mm6        \n\t"\
+    MOVNTQ(%%mm6, 8(dst))\
+\
+    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
+    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
+    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
+\
+    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
+    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
+    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
+\
+    "por    %%mm1, %%mm3        \n\t"\
+    "por    %%mm3, %%mm6        \n\t"\
+    MOVNTQ(%%mm6, 16(dst))\
+\
+    "add      $24, "#dst"       \n\t"\
+\
+    "add       $8, "#index"     \n\t"\
+    "cmp  "#dstw", "#index"     \n\t"\
+    " jb       1b               \n\t"
+
+#if HAVE_MMX2
+#undef WRITEBGR24
+#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
+#else
+#undef WRITEBGR24
+#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
+#endif
+
+#define REAL_WRITEYUY2(dst, dstw, index) \
+    "packuswb  %%mm3, %%mm3     \n\t"\
+    "packuswb  %%mm4, %%mm4     \n\t"\
+    "packuswb  %%mm7, %%mm1     \n\t"\
+    "punpcklbw %%mm4, %%mm3     \n\t"\
+    "movq      %%mm1, %%mm7     \n\t"\
+    "punpcklbw %%mm3, %%mm1     \n\t"\
+    "punpckhbw %%mm3, %%mm7     \n\t"\
+\
+    MOVNTQ(%%mm1, (dst, index, 2))\
+    MOVNTQ(%%mm7, 8(dst, index, 2))\
+\
+    "add          $8, "#index"  \n\t"\
+    "cmp     "#dstw", "#index"  \n\t"\
+    " jb          1b            \n\t"
+#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
+
+
+static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
+{
+#if HAVE_MMX
+    if(!(c->flags & SWS_BITEXACT)){
+        if (c->flags & SWS_ACCURATE_RND){
+            if (uDest){
+                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+            }
+
+            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+        }else{
+            if (uDest){
+                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
+                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
+            }
+
+            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
+        }
+        return;
+    }
+#endif
+#if HAVE_ALTIVEC
+yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
+                      chrFilter, chrSrc, chrFilterSize,
+                      dest, uDest, vDest, dstW, chrDstW);
+#else //HAVE_ALTIVEC
+yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
+            chrFilter, chrSrc, chrFilterSize,
+            dest, uDest, vDest, dstW, chrDstW);
+#endif //!HAVE_ALTIVEC
+}
+
+static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
+{
+yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
+             chrFilter, chrSrc, chrFilterSize,
+             dest, uDest, dstW, chrDstW, dstFormat);
+}
+
+static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
+{
+    int i;
+#if HAVE_MMX
+    if(!(c->flags & SWS_BITEXACT)){
+        long p= uDest ? 3 : 1;
+        uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
+        uint8_t *dst[3]= {dest, uDest, vDest};
+        long counter[3] = {dstW, chrDstW, chrDstW};
+
+        if (c->flags & SWS_ACCURATE_RND){
+            while(p--){
+                __asm__ volatile(
+                    YSCALEYUV2YV121_ACCURATE
+                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                    "g" (-counter[p])
+                    : "%"REG_a
+                );
+            }
+        }else{
+            while(p--){
+                __asm__ volatile(
+                    YSCALEYUV2YV121
+                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
+                    "g" (-counter[p])
+                    : "%"REG_a
+                );
+            }
+        }
+        return;
+    }
+#endif
+    for (i=0; i<dstW; i++)
+    {
+        int val= (lumSrc[i]+64)>>7;
+
+        if (val&256){
+            if (val<0) val=0;
+            else       val=255;
+        }
+
+        dest[i]= val;
+    }
+
+    if (uDest)
+        for (i=0; i<chrDstW; i++)
+        {
+            int u=(chrSrc[i       ]+64)>>7;
+            int v=(chrSrc[i + VOFW]+64)>>7;
+
+            if ((u|v)&256){
+                if (u<0)        u=0;
+                else if (u>255) u=255;
+                if (v<0)        v=0;
+                else if (v>255) v=255;
+            }
+
+            uDest[i]= u;
+            vDest[i]= v;
+        }
+}
+
+
+/**
+ * vertical scale YV12 to RGB
+ */
+static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                                       int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                                       uint8_t *dest, long dstW, long dstY)
+{
+#if HAVE_MMX
+    long dummy=0;
+    if(!(c->flags & SWS_BITEXACT)){
+        if (c->flags & SWS_ACCURATE_RND){
+            switch(c->dstFormat){
+            case PIX_FMT_RGB32:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pcmpeqd %%mm7, %%mm7 \n\t"
+                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_BGR24:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
+                "add %4, %%"REG_c"                        \n\t"
+                WRITEBGR24(%%REGc, %5, %%REGa)
+
+
+                :: "r" (&c->redDither),
+                "m" (dummy), "m" (dummy), "m" (dummy),
+                "r" (dest), "m" (dstW)
+                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+                );
+                return;
+            case PIX_FMT_RGB555:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+
+                WRITERGB15(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_RGB565:
+                YSCALEYUV2PACKEDX_ACCURATE
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
+#endif
+
+                WRITERGB16(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_YUYV422:
+                YSCALEYUV2PACKEDX_ACCURATE
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+
+                "psraw $3, %%mm3    \n\t"
+                "psraw $3, %%mm4    \n\t"
+                "psraw $3, %%mm1    \n\t"
+                "psraw $3, %%mm7    \n\t"
+                WRITEYUY2(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            }
+        }else{
+            switch(c->dstFormat)
+            {
+            case PIX_FMT_RGB32:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pcmpeqd %%mm7, %%mm7 \n\t"
+                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_BGR24:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pxor                    %%mm7, %%mm7       \n\t"
+                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
+                "add                        %4, %%"REG_c"   \n\t"
+                WRITEBGR24(%%REGc, %5, %%REGa)
+
+                :: "r" (&c->redDither),
+                "m" (dummy), "m" (dummy), "m" (dummy),
+                "r" (dest),  "m" (dstW)
+                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+                );
+                return;
+            case PIX_FMT_RGB555:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+
+                WRITERGB15(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_RGB565:
+                YSCALEYUV2PACKEDX
+                YSCALEYUV2RGBX
+                "pxor %%mm7, %%mm7 \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
+                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
+                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
+#endif
+
+                WRITERGB16(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            case PIX_FMT_YUYV422:
+                YSCALEYUV2PACKEDX
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+
+                "psraw $3, %%mm3    \n\t"
+                "psraw $3, %%mm4    \n\t"
+                "psraw $3, %%mm1    \n\t"
+                "psraw $3, %%mm7    \n\t"
+                WRITEYUY2(%4, %5, %%REGa)
+                YSCALEYUV2PACKEDX_END
+                return;
+            }
+        }
+    }
+#endif /* HAVE_MMX */
+#if HAVE_ALTIVEC
+    /* The following list of supported dstFormat values should
+       match what's found in the body of altivec_yuv2packedX() */
+    if (!(c->flags & SWS_BITEXACT) &&
+       (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
+        c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
+        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
+            altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
+                                 chrFilter, chrSrc, chrFilterSize,
+                                 dest, dstW, dstY);
+    else
+#endif
+        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
+                       chrFilter, chrSrc, chrFilterSize,
+                       dest, dstW, dstY);
+}
+
+/**
+ * vertical bilinear scale YV12 to RGB
+ */
+static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
+                          uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+    int  yalpha1=4095- yalpha;
+    int uvalpha1=4095-uvalpha;
+    int i;
+
+#if HAVE_MMX
+    if(!(c->flags & SWS_BITEXACT)){
+        switch(c->dstFormat)
+        {
+            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
+            case PIX_FMT_RGB32:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pcmpeqd %%mm7, %%mm7                   \n\t"
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_BGR24:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB555:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB565:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_YUYV422:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov %4, %%"REG_b"                        \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2PACKED(%%REGBP, %5)
+                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            default: break;
+        }
+    }
+#endif //HAVE_MMX
+YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
+}
+
+/**
+ * YV12 to RGB without scaling or interpolating
+ */
+static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
+                          uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
+{
+    const int yalpha1=0;
+    int i;
+
+    uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int yalpha= 4096; //FIXME ...
+
+    if (flags&SWS_FULL_CHR_H_INT)
+    {
+        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
+        return;
+    }
+
+#if HAVE_MMX
+    if(!(flags & SWS_BITEXACT)){
+        if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        {
+            switch(dstFormat)
+            {
+            case PIX_FMT_RGB32:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                "pcmpeqd %%mm7, %%mm7                   \n\t"
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_BGR24:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB555:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB565:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_YUYV422:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2PACKED1(%%REGBP, %5)
+                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            }
+        }
+        else
+        {
+            switch(dstFormat)
+            {
+            case PIX_FMT_RGB32:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                "pcmpeqd %%mm7, %%mm7                   \n\t"
+                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_BGR24:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB555:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_RGB565:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2RGB1b(%%REGBP, %5)
+                "pxor    %%mm7, %%mm7                   \n\t"
+                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
+                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+#endif
+
+                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            case PIX_FMT_YUYV422:
+                __asm__ volatile(
+                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
+                "mov        %4, %%"REG_b"               \n\t"
+                "push %%"REG_BP"                        \n\t"
+                YSCALEYUV2PACKED1b(%%REGBP, %5)
+                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+                "pop %%"REG_BP"                         \n\t"
+                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+
+                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+                "a" (&c->redDither)
+                );
+                return;
+            }
+        }
+    }
+#endif /* HAVE_MMX */
+    if (uvalpha < 2048)
+    {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    }else{
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+    }
+}
+
+//FIXME yuy2* can read up to 7 samples too much
+
+static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    __asm__ volatile(
+    "movq "MANGLE(bm01010101)", %%mm2           \n\t"
+    "mov                    %0, %%"REG_a"       \n\t"
+    "1:                                         \n\t"
+    "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
+    "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
+    "pand                %%mm2, %%mm0           \n\t"
+    "pand                %%mm2, %%mm1           \n\t"
+    "packuswb            %%mm1, %%mm0           \n\t"
+    "movq                %%mm0, (%2, %%"REG_a") \n\t"
+    "add                    $8, %%"REG_a"       \n\t"
+    " js                    1b                  \n\t"
+    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
+    : "%"REG_a
+    );
+#else
+    int i;
+    for (i=0; i<width; i++)
+        dst[i]= src[2*i];
+#endif
+}
+
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    __asm__ volatile(
+    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
+    "mov                    %0, %%"REG_a"       \n\t"
+    "1:                                         \n\t"
+    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
+    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
+    "psrlw                  $8, %%mm0           \n\t"
+    "psrlw                  $8, %%mm1           \n\t"
+    "packuswb            %%mm1, %%mm0           \n\t"
+    "movq                %%mm0, %%mm1           \n\t"
+    "psrlw                  $8, %%mm0           \n\t"
+    "pand                %%mm4, %%mm1           \n\t"
+    "packuswb            %%mm0, %%mm0           \n\t"
+    "packuswb            %%mm1, %%mm1           \n\t"
+    "movd                %%mm0, (%3, %%"REG_a") \n\t"
+    "movd                %%mm1, (%2, %%"REG_a") \n\t"
+    "add                    $4, %%"REG_a"       \n\t"
+    " js                    1b                  \n\t"
+    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
+    : "%"REG_a
+    );
+#else
+    int i;
+    for (i=0; i<width; i++)
+    {
+        dstU[i]= src1[4*i + 1];
+        dstV[i]= src1[4*i + 3];
+    }
+#endif
+    assert(src1 == src2);
+}
+
+/* This is almost identical to the previous, end exists only because
+ * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
+static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    __asm__ volatile(
+    "mov                  %0, %%"REG_a"         \n\t"
+    "1:                                         \n\t"
+    "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
+    "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
+    "psrlw                $8, %%mm0             \n\t"
+    "psrlw                $8, %%mm1             \n\t"
+    "packuswb          %%mm1, %%mm0             \n\t"
+    "movq              %%mm0, (%2, %%"REG_a")   \n\t"
+    "add                  $8, %%"REG_a"         \n\t"
+    " js                  1b                    \n\t"
+    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
+    : "%"REG_a
+    );
+#else
+    int i;
+    for (i=0; i<width; i++)
+        dst[i]= src[2*i+1];
+#endif
+}
+
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    __asm__ volatile(
+    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
+    "mov                    %0, %%"REG_a"       \n\t"
+    "1:                                         \n\t"
+    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
+    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
+    "pand                %%mm4, %%mm0           \n\t"
+    "pand                %%mm4, %%mm1           \n\t"
+    "packuswb            %%mm1, %%mm0           \n\t"
+    "movq                %%mm0, %%mm1           \n\t"
+    "psrlw                  $8, %%mm0           \n\t"
+    "pand                %%mm4, %%mm1           \n\t"
+    "packuswb            %%mm0, %%mm0           \n\t"
+    "packuswb            %%mm1, %%mm1           \n\t"
+    "movd                %%mm0, (%3, %%"REG_a") \n\t"
+    "movd                %%mm1, (%2, %%"REG_a") \n\t"
+    "add                    $4, %%"REG_a"       \n\t"
+    " js                    1b                  \n\t"
+    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
+    : "%"REG_a
+    );
+#else
+    int i;
+    for (i=0; i<width; i++)
+    {
+        dstU[i]= src1[4*i + 0];
+        dstV[i]= src1[4*i + 2];
+    }
+#endif
+    assert(src1 == src2);
+}
+
+#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
+static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\
+{\
+    int i;\
+    for (i=0; i<width; i++)\
+    {\
+        int b= (((type*)src)[i]>>shb)&maskb;\
+        int g= (((type*)src)[i]>>shg)&maskg;\
+        int r= (((type*)src)[i]>>shr)&maskr;\
+\
+        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
+    }\
+}
+
+BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
+BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
+BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
+BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
+BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
+BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
+
+#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
+static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
+{\
+    int i;\
+    for (i=0; i<width; i++)\
+    {\
+        int b= (((type*)src)[i]&maskb)>>shb;\
+        int g= (((type*)src)[i]&maskg)>>shg;\
+        int r= (((type*)src)[i]&maskr)>>shr;\
+\
+        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
+        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
+    }\
+}\
+static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
+{\
+    int i;\
+    for (i=0; i<width; i++)\
+    {\
+        int pix0= ((type*)src)[2*i+0];\
+        int pix1= ((type*)src)[2*i+1];\
+        int g= (pix0&(maskg|maska))+(pix1&(maskg|maska));\
+        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
+        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
+        g&= maskg|(2*maskg);\
+\
+        g>>=shg;\
+\
+        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
+        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
+    }\
+}
+
+BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
+BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
+BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0,          0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
+BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0,          0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
+BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0,          0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
+BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0,          0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
+
+#if HAVE_MMX
+static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat)
+{
+
+    if(srcFormat == PIX_FMT_BGR24){
+        __asm__ volatile(
+            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
+            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
+            :
+        );
+    }else{
+        __asm__ volatile(
+            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
+            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
+            :
+        );
+    }
+
+    __asm__ volatile(
+        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
+        "mov                        %2, %%"REG_a"   \n\t"
+        "pxor                    %%mm7, %%mm7       \n\t"
+        "1:                                         \n\t"
+        PREFETCH"               64(%0)              \n\t"
+        "movd                     (%0), %%mm0       \n\t"
+        "movd                    2(%0), %%mm1       \n\t"
+        "movd                    6(%0), %%mm2       \n\t"
+        "movd                    8(%0), %%mm3       \n\t"
+        "add                       $12, %0          \n\t"
+        "punpcklbw               %%mm7, %%mm0       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm2       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "pmaddwd                 %%mm5, %%mm0       \n\t"
+        "pmaddwd                 %%mm6, %%mm1       \n\t"
+        "pmaddwd                 %%mm5, %%mm2       \n\t"
+        "pmaddwd                 %%mm6, %%mm3       \n\t"
+        "paddd                   %%mm1, %%mm0       \n\t"
+        "paddd                   %%mm3, %%mm2       \n\t"
+        "paddd                   %%mm4, %%mm0       \n\t"
+        "paddd                   %%mm4, %%mm2       \n\t"
+        "psrad                     $15, %%mm0       \n\t"
+        "psrad                     $15, %%mm2       \n\t"
+        "packssdw                %%mm2, %%mm0       \n\t"
+        "packuswb                %%mm0, %%mm0       \n\t"
+        "movd                %%mm0, (%1, %%"REG_a") \n\t"
+        "add                        $4, %%"REG_a"   \n\t"
+        " js                        1b              \n\t"
+    : "+r" (src)
+    : "r" (dst+width), "g" (-width)
+    : "%"REG_a
+    );
+}
+
+static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat)
+{
+    __asm__ volatile(
+        "movq                    24+%4, %%mm6       \n\t"
+        "mov                        %3, %%"REG_a"   \n\t"
+        "pxor                    %%mm7, %%mm7       \n\t"
+        "1:                                         \n\t"
+        PREFETCH"               64(%0)              \n\t"
+        "movd                     (%0), %%mm0       \n\t"
+        "movd                    2(%0), %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm0       \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "movq                    %%mm0, %%mm2       \n\t"
+        "movq                    %%mm1, %%mm3       \n\t"
+        "pmaddwd                    %4, %%mm0       \n\t"
+        "pmaddwd                  8+%4, %%mm1       \n\t"
+        "pmaddwd                 16+%4, %%mm2       \n\t"
+        "pmaddwd                 %%mm6, %%mm3       \n\t"
+        "paddd                   %%mm1, %%mm0       \n\t"
+        "paddd                   %%mm3, %%mm2       \n\t"
+
+        "movd                    6(%0), %%mm1       \n\t"
+        "movd                    8(%0), %%mm3       \n\t"
+        "add                       $12, %0          \n\t"
+        "punpcklbw               %%mm7, %%mm1       \n\t"
+        "punpcklbw               %%mm7, %%mm3       \n\t"
+        "movq                    %%mm1, %%mm4       \n\t"
+        "movq                    %%mm3, %%mm5       \n\t"
+        "pmaddwd                    %4, %%mm1       \n\t"
+        "pmaddwd                  8+%4, %%mm3       \n\t"
+        "pmaddwd                 16+%4, %%mm4       \n\t"
+        "pmaddwd                 %%mm6, %%mm5       \n\t"
+        "paddd                   %%mm3, %%mm1       \n\t"
+        "paddd                   %%mm5, %%mm4       \n\t"
+
+        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
+        "paddd                   %%mm3, %%mm0       \n\t"
+        "paddd                   %%mm3, %%mm2       \n\t"
+        "paddd                   %%mm3, %%mm1       \n\t"
+        "paddd                   %%mm3, %%mm4       \n\t"
+        "psrad                     $15, %%mm0       \n\t"
+        "psrad                     $15, %%mm2       \n\t"
+        "psrad                     $15, %%mm1       \n\t"
+        "psrad                     $15, %%mm4       \n\t"
+        "packssdw                %%mm1, %%mm0       \n\t"
+        "packssdw                %%mm4, %%mm2       \n\t"
+        "packuswb                %%mm0, %%mm0       \n\t"
+        "packuswb                %%mm2, %%mm2       \n\t"
+        "movd                %%mm0, (%1, %%"REG_a") \n\t"
+        "movd                %%mm2, (%2, %%"REG_a") \n\t"
+        "add                        $4, %%"REG_a"   \n\t"
+        " js                        1b              \n\t"
+    : "+r" (src)
+    : "r" (dstU+width), "r" (dstV+width), "g" (-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
+    : "%"REG_a
+    );
+}
+#endif
+
+static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
+#else
+    int i;
+    for (i=0; i<width; i++)
+    {
+        int b= src[i*3+0];
+        int g= src[i*3+1];
+        int r= src[i*3+2];
+
+        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+    }
+#endif /* HAVE_MMX */
+}
+
+static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
+#else
+    int i;
+    for (i=0; i<width; i++)
+    {
+        int b= src1[3*i + 0];
+        int g= src1[3*i + 1];
+        int r= src1[3*i + 2];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+    }
+#endif /* HAVE_MMX */
+    assert(src1 == src2);
+}
+
+static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++)
+    {
+        int b= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int r= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+    assert(src1 == src2);
+}
+
+static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
+#else
+    int i;
+    for (i=0; i<width; i++)
+    {
+        int r= src[i*3+0];
+        int g= src[i*3+1];
+        int b= src[i*3+2];
+
+        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+    }
+#endif
+}
+
+static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+{
+#if HAVE_MMX
+    assert(src1==src2);
+    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
+#else
+    int i;
+    assert(src1==src2);
+    for (i=0; i<width; i++)
+    {
+        int r= src1[3*i + 0];
+        int g= src1[3*i + 1];
+        int b= src1[3*i + 2];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+    }
+#endif
+}
+
+static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+{
+    int i;
+    assert(src1==src2);
+    for (i=0; i<width; i++)
+    {
+        int r= src1[6*i + 0] + src1[6*i + 3];
+        int g= src1[6*i + 1] + src1[6*i + 4];
+        int b= src1[6*i + 2] + src1[6*i + 5];
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+    }
+}
+
+
+static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal)
+{
+    int i;
+    for (i=0; i<width; i++)
+    {
+        int d= src[i];
+
+        dst[i]= pal[d] & 0xFF;
+    }
+}
+
+static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *pal)
+{
+    int i;
+    assert(src1 == src2);
+    for (i=0; i<width; i++)
+    {
+        int p= pal[src1[i]];
+
+        dstU[i]= p>>8;
+        dstV[i]= p>>16;
+    }
+}
+
+static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+{
+    int i, j;
+    for (i=0; i<width/8; i++){
+        int d= ~src[i];
+        for(j=0; j<8; j++)
+            dst[8*i+j]= ((d>>(7-j))&1)*255;
+    }
+}
+
+static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+{
+    int i, j;
+    for (i=0; i<width/8; i++){
+        int d= src[i];
+        for(j=0; j<8; j++)
+            dst[8*i+j]= ((d>>(7-j))&1)*255;
+    }
+}
+
+// bilinear / bicubic scaling
+static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
+                                  int16_t *filter, int16_t *filterPos, long filterSize)
+{
+#if HAVE_MMX
+    assert(filterSize % 4 == 0 && filterSize>0);
+    if (filterSize==4) // Always true for upscaling, sometimes for down, too.
+    {
+        long counter= -2*dstW;
+        filter-= counter*2;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+#if defined(PIC)
+        "push            %%"REG_b"              \n\t"
+#endif
+        "pxor                %%mm7, %%mm7       \n\t"
+        "push           %%"REG_BP"              \n\t" // we use 7 regs here ...
+        "mov             %%"REG_a", %%"REG_BP"  \n\t"
+        ASMALIGN(4)
+        "1:                                     \n\t"
+        "movzwl   (%2, %%"REG_BP"), %%eax       \n\t"
+        "movzwl  2(%2, %%"REG_BP"), %%ebx       \n\t"
+        "movq  (%1, %%"REG_BP", 4), %%mm1       \n\t"
+        "movq 8(%1, %%"REG_BP", 4), %%mm3       \n\t"
+        "movd      (%3, %%"REG_a"), %%mm0       \n\t"
+        "movd      (%3, %%"REG_b"), %%mm2       \n\t"
+        "punpcklbw           %%mm7, %%mm0       \n\t"
+        "punpcklbw           %%mm7, %%mm2       \n\t"
+        "pmaddwd             %%mm1, %%mm0       \n\t"
+        "pmaddwd             %%mm2, %%mm3       \n\t"
+        "movq                %%mm0, %%mm4       \n\t"
+        "punpckldq           %%mm3, %%mm0       \n\t"
+        "punpckhdq           %%mm3, %%mm4       \n\t"
+        "paddd               %%mm4, %%mm0       \n\t"
+        "psrad                  $7, %%mm0       \n\t"
+        "packssdw            %%mm0, %%mm0       \n\t"
+        "movd                %%mm0, (%4, %%"REG_BP")    \n\t"
+        "add                    $4, %%"REG_BP"  \n\t"
+        " jnc                   1b              \n\t"
+
+        "pop            %%"REG_BP"              \n\t"
+#if defined(PIC)
+        "pop             %%"REG_b"              \n\t"
+#endif
+        : "+a" (counter)
+        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
+#if !defined(PIC)
+        : "%"REG_b
+#endif
+        );
+    }
+    else if (filterSize==8)
+    {
+        long counter= -2*dstW;
+        filter-= counter*4;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+#if defined(PIC)
+        "push             %%"REG_b"             \n\t"
+#endif
+        "pxor                 %%mm7, %%mm7      \n\t"
+        "push            %%"REG_BP"             \n\t" // we use 7 regs here ...
+        "mov              %%"REG_a", %%"REG_BP" \n\t"
+        ASMALIGN(4)
+        "1:                                     \n\t"
+        "movzwl    (%2, %%"REG_BP"), %%eax      \n\t"
+        "movzwl   2(%2, %%"REG_BP"), %%ebx      \n\t"
+        "movq   (%1, %%"REG_BP", 8), %%mm1      \n\t"
+        "movq 16(%1, %%"REG_BP", 8), %%mm3      \n\t"
+        "movd       (%3, %%"REG_a"), %%mm0      \n\t"
+        "movd       (%3, %%"REG_b"), %%mm2      \n\t"
+        "punpcklbw            %%mm7, %%mm0      \n\t"
+        "punpcklbw            %%mm7, %%mm2      \n\t"
+        "pmaddwd              %%mm1, %%mm0      \n\t"
+        "pmaddwd              %%mm2, %%mm3      \n\t"
+
+        "movq  8(%1, %%"REG_BP", 8), %%mm1      \n\t"
+        "movq 24(%1, %%"REG_BP", 8), %%mm5      \n\t"
+        "movd      4(%3, %%"REG_a"), %%mm4      \n\t"
+        "movd      4(%3, %%"REG_b"), %%mm2      \n\t"
+        "punpcklbw            %%mm7, %%mm4      \n\t"
+        "punpcklbw            %%mm7, %%mm2      \n\t"
+        "pmaddwd              %%mm1, %%mm4      \n\t"
+        "pmaddwd              %%mm2, %%mm5      \n\t"
+        "paddd                %%mm4, %%mm0      \n\t"
+        "paddd                %%mm5, %%mm3      \n\t"
+        "movq                 %%mm0, %%mm4      \n\t"
+        "punpckldq            %%mm3, %%mm0      \n\t"
+        "punpckhdq            %%mm3, %%mm4      \n\t"
+        "paddd                %%mm4, %%mm0      \n\t"
+        "psrad                   $7, %%mm0      \n\t"
+        "packssdw             %%mm0, %%mm0      \n\t"
+        "movd                 %%mm0, (%4, %%"REG_BP")   \n\t"
+        "add                     $4, %%"REG_BP" \n\t"
+        " jnc                    1b             \n\t"
+
+        "pop             %%"REG_BP"             \n\t"
+#if defined(PIC)
+        "pop              %%"REG_b"             \n\t"
+#endif
+        : "+a" (counter)
+        : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
+#if !defined(PIC)
+        : "%"REG_b
+#endif
+        );
+    }
+    else
+    {
+        uint8_t *offset = src+filterSize;
+        long counter= -2*dstW;
+        //filter-= counter*filterSize/2;
+        filterPos-= counter/2;
+        dst-= counter/2;
+        __asm__ volatile(
+        "pxor                  %%mm7, %%mm7     \n\t"
+        ASMALIGN(4)
+        "1:                                     \n\t"
+        "mov                      %2, %%"REG_c" \n\t"
+        "movzwl      (%%"REG_c", %0), %%eax     \n\t"
+        "movzwl     2(%%"REG_c", %0), %%edx     \n\t"
+        "mov                      %5, %%"REG_c" \n\t"
+        "pxor                  %%mm4, %%mm4     \n\t"
+        "pxor                  %%mm5, %%mm5     \n\t"
+        "2:                                     \n\t"
+        "movq                   (%1), %%mm1     \n\t"
+        "movq               (%1, %6), %%mm3     \n\t"
+        "movd (%%"REG_c", %%"REG_a"), %%mm0     \n\t"
+        "movd (%%"REG_c", %%"REG_d"), %%mm2     \n\t"
+        "punpcklbw             %%mm7, %%mm0     \n\t"
+        "punpcklbw             %%mm7, %%mm2     \n\t"
+        "pmaddwd               %%mm1, %%mm0     \n\t"
+        "pmaddwd               %%mm2, %%mm3     \n\t"
+        "paddd                 %%mm3, %%mm5     \n\t"
+        "paddd                 %%mm0, %%mm4     \n\t"
+        "add                      $8, %1        \n\t"
+        "add                      $4, %%"REG_c" \n\t"
+        "cmp                      %4, %%"REG_c" \n\t"
+        " jb                      2b            \n\t"
+        "add                      %6, %1        \n\t"
+        "movq                  %%mm4, %%mm0     \n\t"
+        "punpckldq             %%mm5, %%mm4     \n\t"
+        "punpckhdq             %%mm5, %%mm0     \n\t"
+        "paddd                 %%mm0, %%mm4     \n\t"
+        "psrad                    $7, %%mm4     \n\t"
+        "packssdw              %%mm4, %%mm4     \n\t"
+        "mov                      %3, %%"REG_a" \n\t"
+        "movd                  %%mm4, (%%"REG_a", %0)   \n\t"
+        "add                      $4, %0        \n\t"
+        " jnc                     1b            \n\t"
+
+        : "+r" (counter), "+r" (filter)
+        : "m" (filterPos), "m" (dst), "m"(offset),
+          "m" (src), "r" (filterSize*2)
+        : "%"REG_a, "%"REG_c, "%"REG_d
+        );
+    }
+#else
+#if HAVE_ALTIVEC
+    hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
+#else
+    int i;
+    for (i=0; i<dstW; i++)
+    {
+        int j;
+        int srcPos= filterPos[i];
+        int val=0;
+        //printf("filterPos: %d\n", filterPos[i]);
+        for (j=0; j<filterSize; j++)
+        {
+            //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
+            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+        }
+        //filter += hFilterSize;
+        dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
+        //dst[i] = val>>7;
+    }
+#endif /* HAVE_ALTIVEC */
+#endif /* HAVE_MMX */
+}
+      // *** horizontal scale Y line to temp buffer
+static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
+                                   int flags, int canMMX2BeUsed, int16_t *hLumFilter,
+                                   int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
+                                   int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
+                                   int32_t *mmx2FilterPos, uint32_t *pal)
+{
+    if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
+    {
+        RENAME(yuy2ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
+    {
+        RENAME(uyvyToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_RGB32)
+    {
+        RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_RGB32_1)
+    {
+        RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_BGR24)
+    {
+        RENAME(bgr24ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_BGR565)
+    {
+        RENAME(bgr16ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_BGR555)
+    {
+        RENAME(bgr15ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_BGR32)
+    {
+        RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_BGR32_1)
+    {
+        RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_RGB24)
+    {
+        RENAME(rgb24ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_RGB565)
+    {
+        RENAME(rgb16ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_RGB555)
+    {
+        RENAME(rgb15ToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE  || srcFormat==PIX_FMT_RGB4_BYTE)
+    {
+        RENAME(palToY)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_MONOBLACK)
+    {
+        RENAME(monoblack2Y)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+    else if (srcFormat==PIX_FMT_MONOWHITE)
+    {
+        RENAME(monowhite2Y)(formatConvBuffer, src, srcW, pal);
+        src= formatConvBuffer;
+    }
+
+#if HAVE_MMX
+    // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
+    if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
+#else
+    if (!(flags&SWS_FAST_BILINEAR))
+#endif
+    {
+        RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
+    }
+    else // fast bilinear upscale / crap downscale
+    {
+#if ARCH_X86
+#if HAVE_MMX2
+        int i;
+#if defined(PIC)
+        uint64_t ebxsave __attribute__((aligned(8)));
+#endif
+        if (canMMX2BeUsed)
+        {
+            __asm__ volatile(
+#if defined(PIC)
+            "mov               %%"REG_b", %5        \n\t"
+#endif
+            "pxor                  %%mm7, %%mm7     \n\t"
+            "mov                      %0, %%"REG_c" \n\t"
+            "mov                      %1, %%"REG_D" \n\t"
+            "mov                      %2, %%"REG_d" \n\t"
+            "mov                      %3, %%"REG_b" \n\t"
+            "xor               %%"REG_a", %%"REG_a" \n\t" // i
+            PREFETCH"        (%%"REG_c")            \n\t"
+            PREFETCH"      32(%%"REG_c")            \n\t"
+            PREFETCH"      64(%%"REG_c")            \n\t"
+
+#if ARCH_X86_64
+
+#define FUNNY_Y_CODE \
+            "movl            (%%"REG_b"), %%esi     \n\t"\
+            "call                    *%4            \n\t"\
+            "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
+            "add               %%"REG_S", %%"REG_c" \n\t"\
+            "add               %%"REG_a", %%"REG_D" \n\t"\
+            "xor               %%"REG_a", %%"REG_a" \n\t"\
+
+#else
+
+#define FUNNY_Y_CODE \
+            "movl (%%"REG_b"), %%esi        \n\t"\
+            "call         *%4                       \n\t"\
+            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
+            "add               %%"REG_a", %%"REG_D" \n\t"\
+            "xor               %%"REG_a", %%"REG_a" \n\t"\
+
+#endif /* ARCH_X86_64 */
+
+FUNNY_Y_CODE
+FUNNY_Y_CODE
+FUNNY_Y_CODE
+FUNNY_Y_CODE
+FUNNY_Y_CODE
+FUNNY_Y_CODE
+FUNNY_Y_CODE
+FUNNY_Y_CODE
+
+#if defined(PIC)
+            "mov                      %5, %%"REG_b" \n\t"
+#endif
+            :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
+            "m" (funnyYCode)
+#if defined(PIC)
+            ,"m" (ebxsave)
+#endif
+            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+#if !defined(PIC)
+            ,"%"REG_b
+#endif
+            );
+            for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
+        }
+        else
+        {
+#endif /* HAVE_MMX2 */
+        long xInc_shr16 = xInc >> 16;
+        uint16_t xInc_mask = xInc & 0xffff;
+        //NO MMX just normal asm ...
+        __asm__ volatile(
+        "xor %%"REG_a", %%"REG_a"            \n\t" // i
+        "xor %%"REG_d", %%"REG_d"            \n\t" // xx
+        "xorl    %%ecx, %%ecx                \n\t" // 2*xalpha
+        ASMALIGN(4)
+        "1:                                  \n\t"
+        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
+        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
+        "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
+        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*2*xalpha
+        "shll      $16, %%edi                \n\t"
+        "addl    %%edi, %%esi                \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+        "mov        %1, %%"REG_D"            \n\t"
+        "shrl       $9, %%esi                \n\t"
+        "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
+        "addw       %4, %%cx                 \n\t" //2*xalpha += xInc&0xFF
+        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>8 + carry
+
+        "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
+        "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
+        "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
+        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*2*xalpha
+        "shll      $16, %%edi                \n\t"
+        "addl    %%edi, %%esi                \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+        "mov        %1, %%"REG_D"            \n\t"
+        "shrl       $9, %%esi                \n\t"
+        "movw     %%si, 2(%%"REG_D", %%"REG_a", 2)  \n\t"
+        "addw       %4, %%cx                 \n\t" //2*xalpha += xInc&0xFF
+        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>8 + carry
+
+
+        "add        $2, %%"REG_a"            \n\t"
+        "cmp        %2, %%"REG_a"            \n\t"
+        " jb        1b                       \n\t"
+
+
+        :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
+        : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
+        );
+#if HAVE_MMX2
+        } //if MMX2 can't be used
+#endif
+#else
+        int i;
+        unsigned int xpos=0;
+        for (i=0;i<dstWidth;i++)
+        {
+            register unsigned int xx=xpos>>16;
+            register unsigned int xalpha=(xpos&0xFFFF)>>9;
+            dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
+            xpos+=xInc;
+        }
+#endif /* ARCH_X86 */
+    }
+
+    if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
+        int i;
+        //FIXME all pal and rgb srcFormats could do this convertion as well
+        //FIXME all scalers more complex than bilinear could do half of this transform
+        if(c->srcRange){
+            for (i=0; i<dstWidth; i++)
+                dst[i]= (dst[i]*14071 + 33561947)>>14;
+        }else{
+            for (i=0; i<dstWidth; i++)
+                dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
+        }
+    }
+}
+
+inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
+                                   int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
+                                   int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
+                                   int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
+                                   int32_t *mmx2FilterPos, uint32_t *pal)
+{
+    if (srcFormat==PIX_FMT_YUYV422)
+    {
+        RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_UYVY422)
+    {
+        RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_RGB32)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_RGB32_1)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
+        else
+            RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_BGR24)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(bgr24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_BGR565)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(bgr16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_BGR555)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(bgr15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_BGR32)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_BGR32_1)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
+        else
+            RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_RGB24)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(rgb24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_RGB565)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(rgb16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (srcFormat==PIX_FMT_RGB555)
+    {
+        if(c->chrSrcHSubSample)
+            RENAME(rgb15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        else
+            RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+    else if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE)
+    {
+        return;
+    }
+    else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE  || srcFormat==PIX_FMT_RGB4_BYTE)
+    {
+        RENAME(palToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
+        src1= formatConvBuffer;
+        src2= formatConvBuffer+VOFW;
+    }
+
+#if HAVE_MMX
+    // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
+    if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
+#else
+    if (!(flags&SWS_FAST_BILINEAR))
+#endif
+    {
+        RENAME(hScale)(dst     , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+    }
+    else // fast bilinear upscale / crap downscale
+    {
+#if ARCH_X86
+#if HAVE_MMX2
+        int i;
+#if defined(PIC)
+        uint64_t ebxsave __attribute__((aligned(8)));
+#endif
+        if (canMMX2BeUsed)
+        {
+            __asm__ volatile(
+#if defined(PIC)
+            "mov          %%"REG_b", %6         \n\t"
+#endif
+            "pxor             %%mm7, %%mm7      \n\t"
+            "mov                 %0, %%"REG_c"  \n\t"
+            "mov                 %1, %%"REG_D"  \n\t"
+            "mov                 %2, %%"REG_d"  \n\t"
+            "mov                 %3, %%"REG_b"  \n\t"
+            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+            PREFETCH"   (%%"REG_c")             \n\t"
+            PREFETCH" 32(%%"REG_c")             \n\t"
+            PREFETCH" 64(%%"REG_c")             \n\t"
+
+#if ARCH_X86_64
+
+#define FUNNY_UV_CODE \
+            "movl       (%%"REG_b"), %%esi      \n\t"\
+            "call               *%4             \n\t"\
+            "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
+            "add          %%"REG_S", %%"REG_c"  \n\t"\
+            "add          %%"REG_a", %%"REG_D"  \n\t"\
+            "xor          %%"REG_a", %%"REG_a"  \n\t"\
+
+#else
+
+#define FUNNY_UV_CODE \
+            "movl       (%%"REG_b"), %%esi      \n\t"\
+            "call               *%4             \n\t"\
+            "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
+            "add          %%"REG_a", %%"REG_D"  \n\t"\
+            "xor          %%"REG_a", %%"REG_a"  \n\t"\
+
+#endif /* ARCH_X86_64 */
+
+FUNNY_UV_CODE
+FUNNY_UV_CODE
+FUNNY_UV_CODE
+FUNNY_UV_CODE
+            "xor          %%"REG_a", %%"REG_a"  \n\t" // i
+            "mov                 %5, %%"REG_c"  \n\t" // src
+            "mov                 %1, %%"REG_D"  \n\t" // buf1
+            "add              $"AV_STRINGIFY(VOF)", %%"REG_D"  \n\t"
+            PREFETCH"   (%%"REG_c")             \n\t"
+            PREFETCH" 32(%%"REG_c")             \n\t"
+            PREFETCH" 64(%%"REG_c")             \n\t"
+
+FUNNY_UV_CODE
+FUNNY_UV_CODE
+FUNNY_UV_CODE
+FUNNY_UV_CODE
+
+#if defined(PIC)
+            "mov %6, %%"REG_b"    \n\t"
+#endif
+            :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
+            "m" (funnyUVCode), "m" (src2)
+#if defined(PIC)
+            ,"m" (ebxsave)
+#endif
+            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+#if !defined(PIC)
+             ,"%"REG_b
+#endif
+            );
+            for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+            {
+                //printf("%d %d %d\n", dstWidth, i, srcW);
+                dst[i] = src1[srcW-1]*128;
+                dst[i+VOFW] = src2[srcW-1]*128;
+            }
+        }
+        else
+        {
+#endif /* HAVE_MMX2 */
+            long xInc_shr16 = (long) (xInc >> 16);
+            uint16_t xInc_mask = xInc & 0xffff;
+            __asm__ volatile(
+            "xor %%"REG_a", %%"REG_a"               \n\t" // i
+            "xor %%"REG_d", %%"REG_d"               \n\t" // xx
+            "xorl    %%ecx, %%ecx                   \n\t" // 2*xalpha
+            ASMALIGN(4)
+            "1:                                     \n\t"
+            "mov        %0, %%"REG_S"               \n\t"
+            "movzbl  (%%"REG_S", %%"REG_d"), %%edi  \n\t" //src[xx]
+            "movzbl 1(%%"REG_S", %%"REG_d"), %%esi  \n\t" //src[xx+1]
+            "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
+            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*2*xalpha
+            "shll      $16, %%edi                   \n\t"
+            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+            "mov        %1, %%"REG_D"               \n\t"
+            "shrl       $9, %%esi                   \n\t"
+            "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
+
+            "movzbl    (%5, %%"REG_d"), %%edi       \n\t" //src[xx]
+            "movzbl   1(%5, %%"REG_d"), %%esi       \n\t" //src[xx+1]
+            "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
+            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*2*xalpha
+            "shll      $16, %%edi                   \n\t"
+            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+            "mov        %1, %%"REG_D"               \n\t"
+            "shrl       $9, %%esi                   \n\t"
+            "movw     %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2)   \n\t"
+
+            "addw       %4, %%cx                    \n\t" //2*xalpha += xInc&0xFF
+            "adc        %3, %%"REG_d"               \n\t" //xx+= xInc>>8 + carry
+            "add        $1, %%"REG_a"               \n\t"
+            "cmp        %2, %%"REG_a"               \n\t"
+            " jb        1b                          \n\t"
+
+/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
+   which is needed to support GCC 4.0. */
+#if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+            :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
+#else
+            :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
+#endif
+            "r" (src2)
+            : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
+            );
+#if HAVE_MMX2
+        } //if MMX2 can't be used
+#endif
+#else
+        int i;
+        unsigned int xpos=0;
+        for (i=0;i<dstWidth;i++)
+        {
+            register unsigned int xx=xpos>>16;
+            register unsigned int xalpha=(xpos&0xFFFF)>>9;
+            dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
+            dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
+            /* slower
+            dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
+            dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
+            */
+            xpos+=xInc;
+        }
+#endif /* ARCH_X86 */
+    }
+    if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
+        int i;
+        //FIXME all pal and rgb srcFormats could do this convertion as well
+        //FIXME all scalers more complex than bilinear could do half of this transform
+        if(c->srcRange){
+            for (i=0; i<dstWidth; i++){
+                dst[i     ]= (dst[i     ]*1799 + 4081085)>>11; //1469
+                dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469
+            }
+        }else{
+            for (i=0; i<dstWidth; i++){
+                dst[i     ]= (FFMIN(dst[i     ],30775)*4663 - 9289992)>>12; //-264
+                dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
+            }
+        }
+    }
+}
+
+static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                           int srcSliceH, uint8_t* dst[], int dstStride[]){
+
+    /* load a few things into local vars to make the code more readable? and faster */
+    const int srcW= c->srcW;
+    const int dstW= c->dstW;
+    const int dstH= c->dstH;
+    const int chrDstW= c->chrDstW;
+    const int chrSrcW= c->chrSrcW;
+    const int lumXInc= c->lumXInc;
+    const int chrXInc= c->chrXInc;
+    const int dstFormat= c->dstFormat;
+    const int srcFormat= c->srcFormat;
+    const int flags= c->flags;
+    const int canMMX2BeUsed= c->canMMX2BeUsed;
+    int16_t *vLumFilterPos= c->vLumFilterPos;
+    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int16_t *hLumFilterPos= c->hLumFilterPos;
+    int16_t *hChrFilterPos= c->hChrFilterPos;
+    int16_t *vLumFilter= c->vLumFilter;
+    int16_t *vChrFilter= c->vChrFilter;
+    int16_t *hLumFilter= c->hLumFilter;
+    int16_t *hChrFilter= c->hChrFilter;
+    int32_t *lumMmxFilter= c->lumMmxFilter;
+    int32_t *chrMmxFilter= c->chrMmxFilter;
+    const int vLumFilterSize= c->vLumFilterSize;
+    const int vChrFilterSize= c->vChrFilterSize;
+    const int hLumFilterSize= c->hLumFilterSize;
+    const int hChrFilterSize= c->hChrFilterSize;
+    int16_t **lumPixBuf= c->lumPixBuf;
+    int16_t **chrPixBuf= c->chrPixBuf;
+    const int vLumBufSize= c->vLumBufSize;
+    const int vChrBufSize= c->vChrBufSize;
+    uint8_t *funnyYCode= c->funnyYCode;
+    uint8_t *funnyUVCode= c->funnyUVCode;
+    uint8_t *formatConvBuffer= c->formatConvBuffer;
+    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
+    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
+    int lastDstY;
+    uint32_t *pal=c->pal_yuv;
+
+    /* vars which will change and which we need to store back in the context */
+    int dstY= c->dstY;
+    int lumBufIndex= c->lumBufIndex;
+    int chrBufIndex= c->chrBufIndex;
+    int lastInLumBuf= c->lastInLumBuf;
+    int lastInChrBuf= c->lastInChrBuf;
+
+    if (isPacked(c->srcFormat)){
+        src[0]=
+        src[1]=
+        src[2]= src[0];
+        srcStride[0]=
+        srcStride[1]=
+        srcStride[2]= srcStride[0];
+    }
+    srcStride[1]<<= c->vChrDrop;
+    srcStride[2]<<= c->vChrDrop;
+
+    //printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
+    //       (int)dst[0], (int)dst[1], (int)dst[2]);
+
+#if 0 //self test FIXME move to a vfilter or something
+    {
+    static volatile int i=0;
+    i++;
+    if (srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH)
+        selfTest(src, srcStride, c->srcW, c->srcH);
+    i--;
+    }
+#endif
+
+    //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
+    //dstStride[0],dstStride[1],dstStride[2]);
+
+    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
+    {
+        static int warnedAlready=0; //FIXME move this into the context perhaps
+        if (flags & SWS_PRINT_INFO && !warnedAlready)
+        {
+            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
+                   "         ->cannot do aligned memory accesses anymore\n");
+            warnedAlready=1;
+        }
+    }
+
+    /* Note the user might start scaling the picture in the middle so this
+       will not get executed. This is not really intended but works
+       currently, so people might do it. */
+    if (srcSliceY ==0){
+        lumBufIndex=0;
+        chrBufIndex=0;
+        dstY=0;
+        lastInLumBuf= -1;
+        lastInChrBuf= -1;
+    }
+
+    lastDstY= dstY;
+
+    for (;dstY < dstH; dstY++){
+        unsigned char *dest =dst[0]+dstStride[0]*dstY;
+        const int chrDstY= dstY>>c->chrDstVSubSample;
+        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
+        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
+
+        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
+        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
+        const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
+        const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
+
+        //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
+        // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize,  c->chrSrcVSubSample);
+        //handle holes (FAST_BILINEAR & weird filters)
+        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
+        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
+        //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
+        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
+        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
+
+        // Do we have enough lines in this slice to output the dstY line
+        if (lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
+        {
+            //Do horizontal scaling
+            while(lastInLumBuf < lastLumSrcY)
+            {
+                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
+                lumBufIndex++;
+                //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf,  lastLumSrcY);
+                assert(lumBufIndex < 2*vLumBufSize);
+                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
+                assert(lastInLumBuf + 1 - srcSliceY >= 0);
+                //printf("%d %d\n", lumBufIndex, vLumBufSize);
+                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
+                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
+                                funnyYCode, c->srcFormat, formatConvBuffer,
+                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
+                lastInLumBuf++;
+            }
+            while(lastInChrBuf < lastChrSrcY)
+            {
+                uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
+                uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
+                chrBufIndex++;
+                assert(chrBufIndex < 2*vChrBufSize);
+                assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
+                assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
+                //FIXME replace parameters through context struct (some at least)
+
+                if (!(isGray(srcFormat) || isGray(dstFormat)))
+                    RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
+                                    flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
+                                    funnyUVCode, c->srcFormat, formatConvBuffer,
+                                    c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
+                lastInChrBuf++;
+            }
+            //wrap buf index around to stay inside the ring buffer
+            if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
+            if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
+        }
+        else // not enough lines left in this slice -> load the rest in the buffer
+        {
+            /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
+            firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
+            lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
+            vChrBufSize, vLumBufSize);*/
+
+            //Do horizontal scaling
+            while(lastInLumBuf+1 < srcSliceY + srcSliceH)
+            {
+                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
+                lumBufIndex++;
+                assert(lumBufIndex < 2*vLumBufSize);
+                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
+                assert(lastInLumBuf + 1 - srcSliceY >= 0);
+                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
+                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
+                                funnyYCode, c->srcFormat, formatConvBuffer,
+                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
+                lastInLumBuf++;
+            }
+            while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
+            {
+                uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
+                uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
+                chrBufIndex++;
+                assert(chrBufIndex < 2*vChrBufSize);
+                assert(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH);
+                assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
+
+                if (!(isGray(srcFormat) || isGray(dstFormat)))
+                    RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
+                            flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
+                            funnyUVCode, c->srcFormat, formatConvBuffer,
+                            c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
+                lastInChrBuf++;
+            }
+            //wrap buf index around to stay inside the ring buffer
+            if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
+            if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
+            break; //we can't output a dstY line so let's try with the next slice
+        }
+
+#if HAVE_MMX
+        c->blueDither= ff_dither8[dstY&1];
+        if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
+            c->greenDither= ff_dither8[dstY&1];
+        else
+            c->greenDither= ff_dither4[dstY&1];
+        c->redDither= ff_dither8[(dstY+1)&1];
+#endif
+        if (dstY < dstH-2)
+        {
+            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+#if HAVE_MMX
+            int i;
+        if (flags & SWS_ACCURATE_RND){
+            int s= APCK_SIZE / 8;
+            for (i=0; i<vLumFilterSize; i+=2){
+                *(void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
+                *(void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
+                          lumMmxFilter[s*i+APCK_COEF/4  ]=
+                          lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
+                    + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
+            }
+            for (i=0; i<vChrFilterSize; i+=2){
+                *(void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
+                *(void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
+                          chrMmxFilter[s*i+APCK_COEF/4  ]=
+                          chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
+                    + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
+            }
+        }else{
+            for (i=0; i<vLumFilterSize; i++)
+            {
+                lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
+                lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
+                lumMmxFilter[4*i+2]=
+                lumMmxFilter[4*i+3]=
+                    ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
+            }
+            for (i=0; i<vChrFilterSize; i++)
+            {
+                chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
+                chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
+                chrMmxFilter[4*i+2]=
+                chrMmxFilter[4*i+3]=
+                    ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+            }
+        }
+#endif
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                RENAME(yuv2nv12X)(c,
+                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                    dest, uDest, dstW, chrDstW, dstFormat);
+            }
+            else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12 like
+            {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12
+                {
+                    int16_t *lumBuf = lumPixBuf[0];
+                    int16_t *chrBuf= chrPixBuf[0];
+                    RENAME(yuv2yuv1)(c, lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
+                }
+                else //General YV12
+                {
+                    RENAME(yuv2yuvX)(c,
+                        vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                        vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                        dest, uDest, vDest, dstW, chrDstW);
+                }
+            }
+            else
+            {
+                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
+                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB
+                {
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    if(flags & SWS_FULL_CHR_H_INT){
+                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
+                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                            dest, dstW, dstY);
+                    }else{
+                        RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
+                            dest, dstW, chrAlpha, dstFormat, flags, dstY);
+                    }
+                }
+                else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
+                {
+                    int lumAlpha= vLumFilter[2*dstY+1];
+                    int chrAlpha= vChrFilter[2*dstY+1];
+                    lumMmxFilter[2]=
+                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
+                    chrMmxFilter[2]=
+                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
+                    if(flags & SWS_FULL_CHR_H_INT){
+                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
+                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                            dest, dstW, dstY);
+                    }else{
+                        RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
+                            dest, dstW, lumAlpha, chrAlpha, dstY);
+                    }
+                }
+                else //general RGB
+                {
+                    if(flags & SWS_FULL_CHR_H_INT){
+                        yuv2rgbXinC_full(c,
+                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                            dest, dstW, dstY);
+                    }else{
+                        RENAME(yuv2packedX)(c,
+                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                            dest, dstW, dstY);
+                    }
+                }
+            }
+        }
+        else // hmm looks like we can't use MMX here without overwriting this array's tail
+        {
+            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
+                yuv2nv12XinC(
+                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                    dest, uDest, dstW, chrDstW, dstFormat);
+            }
+            else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12
+            {
+                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                yuv2yuvXinC(
+                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                    dest, uDest, vDest, dstW, chrDstW);
+            }
+            else
+            {
+                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
+                assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
+                if(flags & SWS_FULL_CHR_H_INT){
+                    yuv2rgbXinC_full(c,
+                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                        dest, dstW, dstY);
+                }else{
+                    yuv2packedXinC(c,
+                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
+                        dest, dstW, dstY);
+                }
+            }
+        }
+    }
+
+#if HAVE_MMX
+    __asm__ volatile(SFENCE:::"memory");
+    __asm__ volatile(EMMS:::"memory");
+#endif
+    /* store changed local vars back in the context */
+    c->dstY= dstY;
+    c->lumBufIndex= lumBufIndex;
+    c->chrBufIndex= chrBufIndex;
+    c->lastInLumBuf= lastInLumBuf;
+    c->lastInChrBuf= lastInChrBuf;
+
+    return dstY - lastDstY;
+}
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
new file mode 100644
index 0000000000..65af412c2c
--- /dev/null
+++ b/libswscale/yuv2rgb.c
@@ -0,0 +1,684 @@
+/*
+ * software YUV to RGB converter
+ *
+ * Copyright (C) 2009 Konstantin Shishkov
+ *
+ * MMX/MMX2 template stuff (needed for fast movntq support),
+ * 1,4,8bpp support and context / deglobalize stuff
+ * by Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include "config.h"
+#include "rgb2rgb.h"
+#include "swscale.h"
+#include "swscale_internal.h"
+
+#define DITHER1XBPP // only for MMX
+
+extern const uint8_t dither_8x8_32[8][8];
+extern const uint8_t dither_8x8_73[8][8];
+extern const uint8_t dither_8x8_220[8][8];
+
+#if HAVE_MMX && CONFIG_GPL
+
+/* hope these constant values are cache line aligned */
+DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw)   = 0x00ff00ff00ff00ffULL;
+DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
+DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
+
+//MMX versions
+#undef RENAME
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define RENAME(a) a ## _MMX
+#include "yuv2rgb_template.c"
+
+//MMX2 versions
+#undef RENAME
+#undef HAVE_MMX2
+#define HAVE_MMX2 1
+#define RENAME(a) a ## _MMX2
+#include "yuv2rgb_template.c"
+
+#endif /* HAVE_MMX && CONFIG_GPL */
+
+const int32_t ff_yuv2rgb_coeffs[8][4] = {
+    {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
+    {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
+    {104597, 132201, 25675, 53279}, /* unspecified */
+    {104597, 132201, 25675, 53279}, /* reserved */
+    {104448, 132798, 24759, 53109}, /* FCC */
+    {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
+    {104597, 132201, 25675, 53279}, /* SMPTE 170M */
+    {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
+};
+
+#define LOADCHROMA(i)                               \
+    U = pu[i];                                      \
+    V = pv[i];                                      \
+    r = (void *)c->table_rV[V];                     \
+    g = (void *)(c->table_gU[U] + c->table_gV[V]);  \
+    b = (void *)c->table_bU[U];
+
+#define PUTRGB(dst,src,i,o)          \
+    Y = src[2*i+o];                  \
+    dst[2*i  ] = r[Y] + g[Y] + b[Y]; \
+    Y = src[2*i+1-o];                \
+    dst[2*i+1] = r[Y] + g[Y] + b[Y];
+
+#define PUTRGB24(dst,src,i)                                  \
+    Y = src[2*i];                                            \
+    dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
+    Y = src[2*i+1];                                          \
+    dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
+
+#define PUTBGR24(dst,src,i)                                  \
+    Y = src[2*i];                                            \
+    dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
+    Y = src[2*i+1];                                          \
+    dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
+
+#define YUV2RGBFUNC(func_name, dst_type) \
+static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
+                     int srcSliceH, uint8_t* dst[], int dstStride[]){\
+    int y;\
+\
+    if (c->srcFormat == PIX_FMT_YUV422P) {\
+        srcStride[1] *= 2;\
+        srcStride[2] *= 2;\
+    }\
+    for (y=0; y<srcSliceH; y+=2) {\
+        dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY  )*dstStride[0]);\
+        dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
+        dst_type av_unused *r, *b;\
+        dst_type *g;\
+        uint8_t *py_1 = src[0] + y*srcStride[0];\
+        uint8_t *py_2 = py_1 + srcStride[0];\
+        uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
+        uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
+        unsigned int h_size = c->dstW>>3;\
+        while (h_size--) {\
+            int av_unused U, V;\
+            int Y;\
+
+#define ENDYUV2RGBLINE(dst_delta)\
+            pu += 4;\
+            pv += 4;\
+            py_1 += 8;\
+            py_2 += 8;\
+            dst_1 += dst_delta;\
+            dst_2 += dst_delta;\
+        }\
+        if (c->dstW & 4) {\
+            int av_unused Y, U, V;\
+
+#define ENDYUV2RGBFUNC()\
+        }\
+    }\
+    return srcSliceH;\
+}
+
+#define CLOSEYUV2RGBFUNC(dst_delta)\
+    ENDYUV2RGBLINE(dst_delta)\
+    ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuv2rgb_c_32, uint32_t)
+    LOADCHROMA(0);
+    PUTRGB(dst_1,py_1,0,0);
+    PUTRGB(dst_2,py_2,0,1);
+
+    LOADCHROMA(1);
+    PUTRGB(dst_2,py_2,1,1);
+    PUTRGB(dst_1,py_1,1,0);
+    LOADCHROMA(1);
+    PUTRGB(dst_2,py_2,1,1);
+    PUTRGB(dst_1,py_1,1,0);
+
+    LOADCHROMA(2);
+    PUTRGB(dst_1,py_1,2,0);
+    PUTRGB(dst_2,py_2,2,1);
+
+    LOADCHROMA(3);
+    PUTRGB(dst_2,py_2,3,1);
+    PUTRGB(dst_1,py_1,3,0);
+ENDYUV2RGBLINE(8)
+    LOADCHROMA(0);
+    PUTRGB(dst_1,py_1,0,0);
+    PUTRGB(dst_2,py_2,0,1);
+
+    LOADCHROMA(1);
+    PUTRGB(dst_2,py_2,1,1);
+    PUTRGB(dst_1,py_1,1,0);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t)
+    LOADCHROMA(0);
+    PUTRGB24(dst_1,py_1,0);
+    PUTRGB24(dst_2,py_2,0);
+
+    LOADCHROMA(1);
+    PUTRGB24(dst_2,py_2,1);
+    PUTRGB24(dst_1,py_1,1);
+
+    LOADCHROMA(2);
+    PUTRGB24(dst_1,py_1,2);
+    PUTRGB24(dst_2,py_2,2);
+
+    LOADCHROMA(3);
+    PUTRGB24(dst_2,py_2,3);
+    PUTRGB24(dst_1,py_1,3);
+ENDYUV2RGBLINE(24)
+    LOADCHROMA(0);
+    PUTRGB24(dst_1,py_1,0);
+    PUTRGB24(dst_2,py_2,0);
+
+    LOADCHROMA(1);
+    PUTRGB24(dst_2,py_2,1);
+    PUTRGB24(dst_1,py_1,1);
+ENDYUV2RGBFUNC()
+
+// only trivial mods from yuv2rgb_c_24_rgb
+YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t)
+    LOADCHROMA(0);
+    PUTBGR24(dst_1,py_1,0);
+    PUTBGR24(dst_2,py_2,0);
+
+    LOADCHROMA(1);
+    PUTBGR24(dst_2,py_2,1);
+    PUTBGR24(dst_1,py_1,1);
+
+    LOADCHROMA(2);
+    PUTBGR24(dst_1,py_1,2);
+    PUTBGR24(dst_2,py_2,2);
+
+    LOADCHROMA(3);
+    PUTBGR24(dst_2,py_2,3);
+    PUTBGR24(dst_1,py_1,3);
+ENDYUV2RGBLINE(24)
+    LOADCHROMA(0);
+    PUTBGR24(dst_1,py_1,0);
+    PUTBGR24(dst_2,py_2,0);
+
+    LOADCHROMA(1);
+    PUTBGR24(dst_2,py_2,1);
+    PUTBGR24(dst_1,py_1,1);
+ENDYUV2RGBFUNC()
+
+// This is exactly the same code as yuv2rgb_c_32 except for the types of
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_16, uint16_t)
+    LOADCHROMA(0);
+    PUTRGB(dst_1,py_1,0,0);
+    PUTRGB(dst_2,py_2,0,1);
+
+    LOADCHROMA(1);
+    PUTRGB(dst_2,py_2,1,1);
+    PUTRGB(dst_1,py_1,1,0);
+
+    LOADCHROMA(2);
+    PUTRGB(dst_1,py_1,2,0);
+    PUTRGB(dst_2,py_2,2,1);
+
+    LOADCHROMA(3);
+    PUTRGB(dst_2,py_2,3,1);
+    PUTRGB(dst_1,py_1,3,0);
+CLOSEYUV2RGBFUNC(8)
+
+// This is exactly the same code as yuv2rgb_c_32 except for the types of
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_8, uint8_t)
+    LOADCHROMA(0);
+    PUTRGB(dst_1,py_1,0,0);
+    PUTRGB(dst_2,py_2,0,1);
+
+    LOADCHROMA(1);
+    PUTRGB(dst_2,py_2,1,1);
+    PUTRGB(dst_1,py_1,1,0);
+
+    LOADCHROMA(2);
+    PUTRGB(dst_1,py_1,2,0);
+    PUTRGB(dst_2,py_2,2,1);
+
+    LOADCHROMA(3);
+    PUTRGB(dst_2,py_2,3,1);
+    PUTRGB(dst_1,py_1,3,0);
+CLOSEYUV2RGBFUNC(8)
+
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t)
+    const uint8_t *d32 = dither_8x8_32[y&7];
+    const uint8_t *d64 = dither_8x8_73[y&7];
+#define PUTRGB8(dst,src,i,o)                                    \
+    Y = src[2*i];                                               \
+    dst[2*i]   = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
+    Y = src[2*i+1];                                             \
+    dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
+
+    LOADCHROMA(0);
+    PUTRGB8(dst_1,py_1,0,0);
+    PUTRGB8(dst_2,py_2,0,0+8);
+
+    LOADCHROMA(1);
+    PUTRGB8(dst_2,py_2,1,2+8);
+    PUTRGB8(dst_1,py_1,1,2);
+
+    LOADCHROMA(2);
+    PUTRGB8(dst_1,py_1,2,4);
+    PUTRGB8(dst_2,py_2,2,4+8);
+
+    LOADCHROMA(3);
+    PUTRGB8(dst_2,py_2,3,6+8);
+    PUTRGB8(dst_1,py_1,3,6);
+CLOSEYUV2RGBFUNC(8)
+
+
+// This is exactly the same code as yuv2rgb_c_32 except for the types of
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_4, uint8_t)
+    int acc;
+#define PUTRGB4(dst,src,i)          \
+    Y = src[2*i];                   \
+    acc = r[Y] + g[Y] + b[Y];       \
+    Y = src[2*i+1];                 \
+    acc |= (r[Y] + g[Y] + b[Y])<<4; \
+    dst[i] = acc;
+
+    LOADCHROMA(0);
+    PUTRGB4(dst_1,py_1,0);
+    PUTRGB4(dst_2,py_2,0);
+
+    LOADCHROMA(1);
+    PUTRGB4(dst_2,py_2,1);
+    PUTRGB4(dst_1,py_1,1);
+
+    LOADCHROMA(2);
+    PUTRGB4(dst_1,py_1,2);
+    PUTRGB4(dst_2,py_2,2);
+
+    LOADCHROMA(3);
+    PUTRGB4(dst_2,py_2,3);
+    PUTRGB4(dst_1,py_1,3);
+CLOSEYUV2RGBFUNC(4)
+
+YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t)
+    const uint8_t *d64 =  dither_8x8_73[y&7];
+    const uint8_t *d128 = dither_8x8_220[y&7];
+    int acc;
+
+#define PUTRGB4D(dst,src,i,o)                                     \
+    Y = src[2*i];                                                 \
+    acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]];        \
+    Y = src[2*i+1];                                               \
+    acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4;  \
+    dst[i]= acc;
+
+    LOADCHROMA(0);
+    PUTRGB4D(dst_1,py_1,0,0);
+    PUTRGB4D(dst_2,py_2,0,0+8);
+
+    LOADCHROMA(1);
+    PUTRGB4D(dst_2,py_2,1,2+8);
+    PUTRGB4D(dst_1,py_1,1,2);
+
+    LOADCHROMA(2);
+    PUTRGB4D(dst_1,py_1,2,4);
+    PUTRGB4D(dst_2,py_2,2,4+8);
+
+    LOADCHROMA(3);
+    PUTRGB4D(dst_2,py_2,3,6+8);
+    PUTRGB4D(dst_1,py_1,3,6);
+CLOSEYUV2RGBFUNC(4)
+
+// This is exactly the same code as yuv2rgb_c_32 except for the types of
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t)
+    LOADCHROMA(0);
+    PUTRGB(dst_1,py_1,0,0);
+    PUTRGB(dst_2,py_2,0,1);
+
+    LOADCHROMA(1);
+    PUTRGB(dst_2,py_2,1,1);
+    PUTRGB(dst_1,py_1,1,0);
+
+    LOADCHROMA(2);
+    PUTRGB(dst_1,py_1,2,0);
+    PUTRGB(dst_2,py_2,2,1);
+
+    LOADCHROMA(3);
+    PUTRGB(dst_2,py_2,3,1);
+    PUTRGB(dst_1,py_1,3,0);
+CLOSEYUV2RGBFUNC(8)
+
+YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t)
+    const uint8_t *d64 =  dither_8x8_73[y&7];
+    const uint8_t *d128 = dither_8x8_220[y&7];
+
+#define PUTRGB4DB(dst,src,i,o)                                    \
+    Y = src[2*i];                                                 \
+    dst[2*i]   = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
+    Y = src[2*i+1];                                               \
+    dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
+
+    LOADCHROMA(0);
+    PUTRGB4DB(dst_1,py_1,0,0);
+    PUTRGB4DB(dst_2,py_2,0,0+8);
+
+    LOADCHROMA(1);
+    PUTRGB4DB(dst_2,py_2,1,2+8);
+    PUTRGB4DB(dst_1,py_1,1,2);
+
+    LOADCHROMA(2);
+    PUTRGB4DB(dst_1,py_1,2,4);
+    PUTRGB4DB(dst_2,py_2,2,4+8);
+
+    LOADCHROMA(3);
+    PUTRGB4DB(dst_2,py_2,3,6+8);
+    PUTRGB4DB(dst_1,py_1,3,6);
+CLOSEYUV2RGBFUNC(8)
+
+YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t)
+        const uint8_t *d128 = dither_8x8_220[y&7];
+        char out_1 = 0, out_2 = 0;
+        g= c->table_gU[128] + c->table_gV[128];
+
+#define PUTRGB1(out,src,i,o)    \
+    Y = src[2*i];               \
+    out+= out + g[Y+d128[0+o]]; \
+    Y = src[2*i+1];             \
+    out+= out + g[Y+d128[1+o]];
+
+    PUTRGB1(out_1,py_1,0,0);
+    PUTRGB1(out_2,py_2,0,0+8);
+
+    PUTRGB1(out_2,py_2,1,2+8);
+    PUTRGB1(out_1,py_1,1,2);
+
+    PUTRGB1(out_1,py_1,2,4);
+    PUTRGB1(out_2,py_2,2,4+8);
+
+    PUTRGB1(out_2,py_2,3,6+8);
+    PUTRGB1(out_1,py_1,3,6);
+
+    dst_1[0]= out_1;
+    dst_2[0]= out_2;
+CLOSEYUV2RGBFUNC(1)
+
+SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c)
+{
+    SwsFunc t = NULL;
+#if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL
+    if (c->flags & SWS_CPU_CAPS_MMX2) {
+        switch (c->dstFormat) {
+        case PIX_FMT_RGB32:  return yuv420_rgb32_MMX2;
+        case PIX_FMT_BGR24:  return yuv420_rgb24_MMX2;
+        case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
+        case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
+        }
+    }
+    if (c->flags & SWS_CPU_CAPS_MMX) {
+        switch (c->dstFormat) {
+        case PIX_FMT_RGB32:  return yuv420_rgb32_MMX;
+        case PIX_FMT_BGR24:  return yuv420_rgb24_MMX;
+        case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
+        case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
+        }
+    }
+#endif
+#if HAVE_VIS
+    t = sws_yuv2rgb_init_vis(c);
+#endif
+#if CONFIG_MLIB
+    t = sws_yuv2rgb_init_mlib(c);
+#endif
+#if HAVE_ALTIVEC && CONFIG_GPL
+    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+        t = sws_yuv2rgb_init_altivec(c);
+#endif
+
+#if ARCH_BFIN
+    if (c->flags & SWS_CPU_CAPS_BFIN)
+        t = sws_ff_bfin_yuv2rgb_get_func_ptr(c);
+#endif
+
+    if (t)
+        return t;
+
+    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
+
+    switch (c->dstFormat) {
+    case PIX_FMT_BGR32_1:
+    case PIX_FMT_RGB32_1:
+    case PIX_FMT_BGR32:
+    case PIX_FMT_RGB32:      return yuv2rgb_c_32;
+    case PIX_FMT_RGB24:      return yuv2rgb_c_24_rgb;
+    case PIX_FMT_BGR24:      return yuv2rgb_c_24_bgr;
+    case PIX_FMT_RGB565:
+    case PIX_FMT_BGR565:
+    case PIX_FMT_RGB555:
+    case PIX_FMT_BGR555:     return yuv2rgb_c_16;
+    case PIX_FMT_RGB8:
+    case PIX_FMT_BGR8:       return yuv2rgb_c_8_ordered_dither;
+    case PIX_FMT_RGB4:
+    case PIX_FMT_BGR4:       return yuv2rgb_c_4_ordered_dither;
+    case PIX_FMT_RGB4_BYTE:
+    case PIX_FMT_BGR4_BYTE:  return yuv2rgb_c_4b_ordered_dither;
+    case PIX_FMT_MONOBLACK:  return yuv2rgb_c_1_ordered_dither;
+    default:
+        assert(0);
+    }
+    return NULL;
+}
+
+static void fill_table(uint8_t* table[256], const int elemsize, const int inc, uint8_t *y_table)
+{
+    int i;
+    int64_t cb = 0;
+
+    y_table -= elemsize * (inc >> 9);
+
+    for (i = 0; i < 256; i++) {
+        table[i] = y_table + elemsize * (cb >> 16);
+        cb += inc;
+    }
+}
+
+static void fill_gv_table(int table[256], const int elemsize, const int inc)
+{
+    int i;
+    int64_t cb = 0;
+    int off = -(inc >> 9);
+
+    for (i = 0; i < 256; i++) {
+        table[i] = elemsize * (off + (cb >> 16));
+        cb += inc;
+    }
+}
+
+av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
+                                      int brightness, int contrast, int saturation)
+{
+    const int isRgb =      c->dstFormat==PIX_FMT_RGB32
+                        || c->dstFormat==PIX_FMT_RGB32_1
+                        || c->dstFormat==PIX_FMT_BGR24
+                        || c->dstFormat==PIX_FMT_RGB565
+                        || c->dstFormat==PIX_FMT_RGB555
+                        || c->dstFormat==PIX_FMT_RGB8
+                        || c->dstFormat==PIX_FMT_RGB4
+                        || c->dstFormat==PIX_FMT_RGB4_BYTE
+                        || c->dstFormat==PIX_FMT_MONOBLACK;
+    const int bpp = fmt_depth(c->dstFormat);
+    uint8_t *y_table;
+    uint16_t *y_table16;
+    uint32_t *y_table32;
+    int i, base, rbase, gbase, bbase, abase;
+    const int yoffs = fullRange ? 384 : 326;
+
+    int64_t crv =  inv_table[0];
+    int64_t cbu =  inv_table[1];
+    int64_t cgu = -inv_table[2];
+    int64_t cgv = -inv_table[3];
+    int64_t cy  = 1<<16;
+    int64_t oy  = 0;
+
+    int64_t yb = 0;
+
+    if (!fullRange) {
+        cy = (cy*255) / 219;
+        oy = 16<<16;
+    } else {
+        crv = (crv*224) / 255;
+        cbu = (cbu*224) / 255;
+        cgu = (cgu*224) / 255;
+        cgv = (cgv*224) / 255;
+    }
+
+    cy  = (cy *contrast             ) >> 16;
+    crv = (crv*contrast * saturation) >> 32;
+    cbu = (cbu*contrast * saturation) >> 32;
+    cgu = (cgu*contrast * saturation) >> 32;
+    cgv = (cgv*contrast * saturation) >> 32;
+    oy -= 256*brightness;
+
+    //scale coefficients by cy
+    crv = ((crv << 16) + 0x8000) / cy;
+    cbu = ((cbu << 16) + 0x8000) / cy;
+    cgu = ((cgu << 16) + 0x8000) / cy;
+    cgv = ((cgv << 16) + 0x8000) / cy;
+
+    av_free(c->yuvTable);
+
+    switch (bpp) {
+    case 1:
+        c->yuvTable = av_malloc(1024);
+        y_table = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024-110; i++) {
+            y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
+            yb += cy;
+        }
+        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
+        fill_gv_table(c->table_gV, 1, cgv);
+        break;
+    case 4:
+    case 4|128:
+        rbase = isRgb ? 3 : 0;
+        gbase = 1;
+        bbase = isRgb ? 0 : 3;
+        c->yuvTable = av_malloc(1024*3);
+        y_table = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024-110; i++) {
+            int yval = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table[i+110     ] =  (yval >> 7)       << rbase;
+            y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
+            y_table[i+110+2048] =  (yval >> 7)       << bbase;
+            yb += cy;
+        }
+        fill_table(c->table_rV, 1, crv, y_table + yoffs);
+        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
+        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
+        fill_gv_table(c->table_gV, 1, cgv);
+        break;
+    case 8:
+        rbase = isRgb ? 5 : 0;
+        gbase = isRgb ? 2 : 3;
+        bbase = isRgb ? 0 : 6;
+        c->yuvTable = av_malloc(1024*3);
+        y_table = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024-38; i++) {
+            int yval = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table[i+16     ] = ((yval + 18) / 36) << rbase;
+            y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
+            y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
+            yb += cy;
+        }
+        fill_table(c->table_rV, 1, crv, y_table + yoffs);
+        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
+        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
+        fill_gv_table(c->table_gV, 1, cgv);
+        break;
+    case 15:
+    case 16:
+        rbase = isRgb ? bpp - 5 : 0;
+        gbase = 5;
+        bbase = isRgb ? 0 : (bpp - 5);
+        c->yuvTable = av_malloc(1024*3*2);
+        y_table16 = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024; i++) {
+            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table16[i     ] = (yval >> 3)          << rbase;
+            y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
+            y_table16[i+2048] = (yval >> 3)          << bbase;
+            yb += cy;
+        }
+        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
+        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
+        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
+        fill_gv_table(c->table_gV, 2, cgv);
+        break;
+    case 24:
+        c->yuvTable = av_malloc(1024);
+        y_table = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024; i++) {
+            y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
+            yb += cy;
+        }
+        fill_table(c->table_rV, 1, crv, y_table + yoffs);
+        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
+        fill_table(c->table_bU, 1, cbu, y_table + yoffs);
+        fill_gv_table(c->table_gV, 1, cgv);
+        break;
+    case 32:
+        base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
+        rbase = base + (isRgb ? 16 : 0);
+        gbase = base + 8;
+        bbase = base + (isRgb ? 0 : 16);
+        abase = (base + 24) & 31;
+        c->yuvTable = av_malloc(1024*3*4);
+        y_table32 = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024; i++) {
+            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table32[i     ] = (yval << rbase) + (255 << abase);
+            y_table32[i+1024] = yval << gbase;
+            y_table32[i+2048] = yval << bbase;
+            yb += cy;
+        }
+        fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
+        fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
+        fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
+        fill_gv_table(c->table_gV, 4, cgv);
+        break;
+    default:
+        c->yuvTable = NULL;
+        av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
+        return -1;
+    }
+    return 0;
+}
diff --git a/libswscale/yuv2rgb_altivec.c b/libswscale/yuv2rgb_altivec.c
new file mode 100644
index 0000000000..b3a87a0360
--- /dev/null
+++ b/libswscale/yuv2rgb_altivec.c
@@ -0,0 +1,962 @@
+/*
+ * AltiVec acceleration for colorspace conversion
+ *
+ * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+Convert I420 YV12 to RGB in various formats,
+  it rejects images that are not in 420 formats,
+  it rejects images that don't have widths of multiples of 16,
+  it rejects images that don't have heights of multiples of 2.
+Reject defers to C simulation code.
+
+Lots of optimizations to be done here.
+
+1. Need to fix saturation code. I just couldn't get it to fly with packs
+   and adds, so we currently use max/min to clip.
+
+2. The inefficient use of chroma loading needs a bit of brushing up.
+
+3. Analysis of pipeline stalls needs to be done. Use shark to identify
+   pipeline stalls.
+
+
+MODIFIED to calculate coeffs from currently selected color space.
+MODIFIED core to be a macro where you specify the output format.
+ADDED UYVY conversion which is never called due to some thing in swscale.
+CORRECTED algorithim selection to be strict on input formats.
+ADDED runtime detection of AltiVec.
+
+ADDED altivec_yuv2packedX vertical scl + RGB converter
+
+March 27,2004
+PERFORMANCE ANALYSIS
+
+The C version uses 25% of the processor or ~250Mips for D1 video rawvideo
+used as test.
+The AltiVec version uses 10% of the processor or ~100Mips for D1 video
+same sequence.
+
+720 * 480 * 30  ~10MPS
+
+so we have roughly 10 clocks per pixel. This is too high, something has
+to be wrong.
+
+OPTIMIZED clip codes to utilize vec_max and vec_packs removing the
+need for vec_min.
+
+OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to have
+the input video frame, it was just decompressed so it probably resides in L1
+caches. However, we are creating the output video stream. This needs to use the
+DSTST instruction to optimize for the cache. We couple this with the fact that
+we are not going to be visiting the input buffer again so we mark it Least
+Recently Used. This shaves 25% of the processor cycles off.
+
+Now memcpy is the largest mips consumer in the system, probably due
+to the inefficient X11 stuff.
+
+GL libraries seem to be very slow on this machine 1.33Ghz PB running
+Jaguar, this is not the case for my 1Ghz PB.  I thought it might be
+a versioning issue, however I have libGL.1.2.dylib for both
+machines. (We need to figure this out now.)
+
+GL2 libraries work now with patch for RGB32.
+
+NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor.
+
+Integrated luma prescaling adjustment for saturation/contrast/brightness
+adjustment.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+#include "config.h"
+#include "rgb2rgb.h"
+#include "swscale.h"
+#include "swscale_internal.h"
+
+#undef PROFILE_THE_BEAST
+#undef INC_SCALING
+
+typedef unsigned char ubyte;
+typedef signed char   sbyte;
+
+
+/* RGB interleaver, 16 planar pels 8-bit samples per channel in
+   homogeneous vector registers x0,x1,x2 are interleaved with the
+   following technique:
+
+      o0 = vec_mergeh (x0,x1);
+      o1 = vec_perm (o0, x2, perm_rgb_0);
+      o2 = vec_perm (o0, x2, perm_rgb_1);
+      o3 = vec_mergel (x0,x1);
+      o4 = vec_perm (o3,o2,perm_rgb_2);
+      o5 = vec_perm (o3,o2,perm_rgb_3);
+
+  perm_rgb_0:   o0(RG).h v1(B) --> o1*
+              0   1  2   3   4
+             rgbr|gbrg|brgb|rgbr
+             0010 0100 1001 0010
+             0102 3145 2673 894A
+
+  perm_rgb_1:   o0(RG).h v1(B) --> o2
+              0   1  2   3   4
+             gbrg|brgb|bbbb|bbbb
+             0100 1001 1111 1111
+             B5CD 6EF7 89AB CDEF
+
+  perm_rgb_2:   o3(RG).l o2(rgbB.l) --> o4*
+              0   1  2   3   4
+             gbrg|brgb|rgbr|gbrg
+             1111 1111 0010 0100
+             89AB CDEF 0182 3945
+
+  perm_rgb_2:   o3(RG).l o2(rgbB.l) ---> o5*
+              0   1  2   3   4
+             brgb|rgbr|gbrg|brgb
+             1001 0010 0100 1001
+             a67b 89cA BdCD eEFf
+
+*/
+static
+const vector unsigned char
+  perm_rgb_0 = {0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
+                0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
+  perm_rgb_1 = {0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
+                0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
+  perm_rgb_2 = {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
+                0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
+  perm_rgb_3 = {0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
+                0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
+
+#define vec_merge3(x2,x1,x0,y0,y1,y2)       \
+do {                                        \
+    __typeof__(x0) o0,o2,o3;                \
+        o0 = vec_mergeh (x0,x1);            \
+        y0 = vec_perm (o0, x2, perm_rgb_0); \
+        o2 = vec_perm (o0, x2, perm_rgb_1); \
+        o3 = vec_mergel (x0,x1);            \
+        y1 = vec_perm (o3,o2,perm_rgb_2);   \
+        y2 = vec_perm (o3,o2,perm_rgb_3);   \
+} while(0)
+
+#define vec_mstbgr24(x0,x1,x2,ptr)      \
+do {                                    \
+    __typeof__(x0) _0,_1,_2;            \
+    vec_merge3 (x0,x1,x2,_0,_1,_2);     \
+    vec_st (_0, 0, ptr++);              \
+    vec_st (_1, 0, ptr++);              \
+    vec_st (_2, 0, ptr++);              \
+}  while (0);
+
+#define vec_mstrgb24(x0,x1,x2,ptr)      \
+do {                                    \
+    __typeof__(x0) _0,_1,_2;            \
+    vec_merge3 (x2,x1,x0,_0,_1,_2);     \
+    vec_st (_0, 0, ptr++);              \
+    vec_st (_1, 0, ptr++);              \
+    vec_st (_2, 0, ptr++);              \
+}  while (0);
+
+/* pack the pixels in rgb0 format
+   msb R
+   lsb 0
+*/
+#define vec_mstrgb32(T,x0,x1,x2,x3,ptr)                                       \
+do {                                                                          \
+    T _0,_1,_2,_3;                                                            \
+    _0 = vec_mergeh (x0,x1);                                                  \
+    _1 = vec_mergeh (x2,x3);                                                  \
+    _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
+    _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
+    vec_st (_2, 0*16, (T *)ptr);                                              \
+    vec_st (_3, 1*16, (T *)ptr);                                              \
+    _0 = vec_mergel (x0,x1);                                                  \
+    _1 = vec_mergel (x2,x3);                                                  \
+    _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
+    _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
+    vec_st (_2, 2*16, (T *)ptr);                                              \
+    vec_st (_3, 3*16, (T *)ptr);                                              \
+    ptr += 4;                                                                 \
+}  while (0);
+
+/*
+
+  | 1     0       1.4021   | | Y |
+  | 1    -0.3441 -0.7142   |x| Cb|
+  | 1     1.7718  0        | | Cr|
+
+
+  Y:      [-128 127]
+  Cb/Cr : [-128 127]
+
+  typical yuv conversion work on Y: 0-255 this version has been optimized for jpeg decode.
+
+*/
+
+
+
+
+#define vec_unh(x) \
+    (vector signed short) \
+        vec_perm(x,(__typeof__(x)){0}, \
+                 ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
+                                         0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
+#define vec_unl(x) \
+    (vector signed short) \
+        vec_perm(x,(__typeof__(x)){0}, \
+                 ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
+                                         0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
+
+#define vec_clip_s16(x) \
+    vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
+                         ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
+
+#define vec_packclp(x,y) \
+    (vector unsigned char)vec_packs \
+        ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
+         (vector unsigned short)vec_max (y,((vector signed short) {0})))
+
+//#define out_pixels(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,a,a,ptr)
+
+
+static inline void cvtyuvtoRGB (SwsContext *c,
+                                vector signed short Y, vector signed short U, vector signed short V,
+                                vector signed short *R, vector signed short *G, vector signed short *B)
+{
+    vector signed   short vx,ux,uvx;
+
+    Y = vec_mradds (Y, c->CY, c->OY);
+    U  = vec_sub (U,(vector signed short)
+                    vec_splat((vector signed short){128},0));
+    V  = vec_sub (V,(vector signed short)
+                    vec_splat((vector signed short){128},0));
+
+    //   ux  = (CBU*(u<<c->CSHIFT)+0x4000)>>15;
+    ux = vec_sl (U, c->CSHIFT);
+    *B = vec_mradds (ux, c->CBU, Y);
+
+    // vx  = (CRV*(v<<c->CSHIFT)+0x4000)>>15;
+    vx = vec_sl (V, c->CSHIFT);
+    *R = vec_mradds (vx, c->CRV, Y);
+
+    // uvx = ((CGU*u) + (CGV*v))>>15;
+    uvx = vec_mradds (U, c->CGU, Y);
+    *G  = vec_mradds (V, c->CGV, uvx);
+}
+
+
+/*
+  ------------------------------------------------------------------------------
+  CS converters
+  ------------------------------------------------------------------------------
+*/
+
+
+#define DEFCSP420_CVT(name,out_pixels)                                  \
+static int altivec_##name (SwsContext *c,                               \
+                           unsigned char **in, int *instrides,          \
+                           int srcSliceY,        int srcSliceH,         \
+                           unsigned char **oplanes, int *outstrides)    \
+{                                                                       \
+    int w = c->srcW;                                                    \
+    int h = srcSliceH;                                                  \
+    int i,j;                                                            \
+    int instrides_scl[3];                                               \
+    vector unsigned char y0,y1;                                         \
+                                                                        \
+    vector signed char  u,v;                                            \
+                                                                        \
+    vector signed short Y0,Y1,Y2,Y3;                                    \
+    vector signed short U,V;                                            \
+    vector signed short vx,ux,uvx;                                      \
+    vector signed short vx0,ux0,uvx0;                                   \
+    vector signed short vx1,ux1,uvx1;                                   \
+    vector signed short R0,G0,B0;                                       \
+    vector signed short R1,G1,B1;                                       \
+    vector unsigned char R,G,B;                                         \
+                                                                        \
+    vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;                  \
+    vector unsigned char align_perm;                                    \
+                                                                        \
+    vector signed short                                                 \
+        lCY  = c->CY,                                                   \
+        lOY  = c->OY,                                                   \
+        lCRV = c->CRV,                                                  \
+        lCBU = c->CBU,                                                  \
+        lCGU = c->CGU,                                                  \
+        lCGV = c->CGV;                                                  \
+                                                                        \
+    vector unsigned short lCSHIFT = c->CSHIFT;                          \
+                                                                        \
+    ubyte *y1i   = in[0];                                               \
+    ubyte *y2i   = in[0]+instrides[0];                                  \
+    ubyte *ui    = in[1];                                               \
+    ubyte *vi    = in[2];                                               \
+                                                                        \
+    vector unsigned char *oute                                          \
+        = (vector unsigned char *)                                      \
+            (oplanes[0]+srcSliceY*outstrides[0]);                       \
+    vector unsigned char *outo                                          \
+        = (vector unsigned char *)                                      \
+            (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);         \
+                                                                        \
+                                                                        \
+    instrides_scl[0] = instrides[0]*2-w;  /* the loop moves y{1,2}i by w */ \
+    instrides_scl[1] = instrides[1]-w/2;  /* the loop moves ui by w/2 */    \
+    instrides_scl[2] = instrides[2]-w/2;  /* the loop moves vi by w/2 */    \
+                                                                        \
+                                                                        \
+    for (i=0;i<h/2;i++) {                                               \
+        vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);          \
+        vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);          \
+                                                                        \
+        for (j=0;j<w/16;j++) {                                          \
+                                                                        \
+            y1ivP = (vector unsigned char *)y1i;                        \
+            y2ivP = (vector unsigned char *)y2i;                        \
+            uivP  = (vector unsigned char *)ui;                         \
+            vivP  = (vector unsigned char *)vi;                         \
+                                                                        \
+            align_perm = vec_lvsl (0, y1i);                             \
+            y0 = (vector unsigned char)                                 \
+                 vec_perm (y1ivP[0], y1ivP[1], align_perm);             \
+                                                                        \
+            align_perm = vec_lvsl (0, y2i);                             \
+            y1 = (vector unsigned char)                                 \
+                 vec_perm (y2ivP[0], y2ivP[1], align_perm);             \
+                                                                        \
+            align_perm = vec_lvsl (0, ui);                              \
+            u = (vector signed char)                                    \
+                vec_perm (uivP[0], uivP[1], align_perm);                \
+                                                                        \
+            align_perm = vec_lvsl (0, vi);                              \
+            v = (vector signed char)                                    \
+                vec_perm (vivP[0], vivP[1], align_perm);                \
+                                                                        \
+            u  = (vector signed char)                                   \
+                 vec_sub (u,(vector signed char)                        \
+                          vec_splat((vector signed char){128},0));      \
+            v  = (vector signed char)                                   \
+                 vec_sub (v,(vector signed char)                        \
+                          vec_splat((vector signed char){128},0));      \
+                                                                        \
+            U  = vec_unpackh (u);                                       \
+            V  = vec_unpackh (v);                                       \
+                                                                        \
+                                                                        \
+            Y0 = vec_unh (y0);                                          \
+            Y1 = vec_unl (y0);                                          \
+            Y2 = vec_unh (y1);                                          \
+            Y3 = vec_unl (y1);                                          \
+                                                                        \
+            Y0 = vec_mradds (Y0, lCY, lOY);                             \
+            Y1 = vec_mradds (Y1, lCY, lOY);                             \
+            Y2 = vec_mradds (Y2, lCY, lOY);                             \
+            Y3 = vec_mradds (Y3, lCY, lOY);                             \
+                                                                        \
+            /*   ux  = (CBU*(u<<CSHIFT)+0x4000)>>15 */                  \
+            ux = vec_sl (U, lCSHIFT);                                   \
+            ux = vec_mradds (ux, lCBU, (vector signed short){0});       \
+            ux0  = vec_mergeh (ux,ux);                                  \
+            ux1  = vec_mergel (ux,ux);                                  \
+                                                                        \
+            /* vx  = (CRV*(v<<CSHIFT)+0x4000)>>15;        */            \
+            vx = vec_sl (V, lCSHIFT);                                   \
+            vx = vec_mradds (vx, lCRV, (vector signed short){0});       \
+            vx0  = vec_mergeh (vx,vx);                                  \
+            vx1  = vec_mergel (vx,vx);                                  \
+                                                                        \
+            /* uvx = ((CGU*u) + (CGV*v))>>15 */                         \
+            uvx = vec_mradds (U, lCGU, (vector signed short){0});       \
+            uvx = vec_mradds (V, lCGV, uvx);                            \
+            uvx0 = vec_mergeh (uvx,uvx);                                \
+            uvx1 = vec_mergel (uvx,uvx);                                \
+                                                                        \
+            R0 = vec_add (Y0,vx0);                                      \
+            G0 = vec_add (Y0,uvx0);                                     \
+            B0 = vec_add (Y0,ux0);                                      \
+            R1 = vec_add (Y1,vx1);                                      \
+            G1 = vec_add (Y1,uvx1);                                     \
+            B1 = vec_add (Y1,ux1);                                      \
+                                                                        \
+            R  = vec_packclp (R0,R1);                                   \
+            G  = vec_packclp (G0,G1);                                   \
+            B  = vec_packclp (B0,B1);                                   \
+                                                                        \
+            out_pixels(R,G,B,oute);                                     \
+                                                                        \
+            R0 = vec_add (Y2,vx0);                                      \
+            G0 = vec_add (Y2,uvx0);                                     \
+            B0 = vec_add (Y2,ux0);                                      \
+            R1 = vec_add (Y3,vx1);                                      \
+            G1 = vec_add (Y3,uvx1);                                     \
+            B1 = vec_add (Y3,ux1);                                      \
+            R  = vec_packclp (R0,R1);                                   \
+            G  = vec_packclp (G0,G1);                                   \
+            B  = vec_packclp (B0,B1);                                   \
+                                                                        \
+                                                                        \
+            out_pixels(R,G,B,outo);                                     \
+                                                                        \
+            y1i  += 16;                                                 \
+            y2i  += 16;                                                 \
+            ui   += 8;                                                  \
+            vi   += 8;                                                  \
+                                                                        \
+        }                                                               \
+                                                                        \
+        outo  += (outstrides[0])>>4;                                    \
+        oute  += (outstrides[0])>>4;                                    \
+                                                                        \
+        ui    += instrides_scl[1];                                      \
+        vi    += instrides_scl[2];                                      \
+        y1i   += instrides_scl[0];                                      \
+        y2i   += instrides_scl[0];                                      \
+    }                                                                   \
+    return srcSliceH;                                                   \
+}
+
+
+#define out_abgr(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
+#define out_bgra(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
+#define out_rgba(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
+#define out_argb(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
+#define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
+#define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
+
+DEFCSP420_CVT (yuv2_abgr, out_abgr)
+#if 1
+DEFCSP420_CVT (yuv2_bgra, out_bgra)
+#else
+static int altivec_yuv2_bgra32 (SwsContext *c,
+                                unsigned char **in, int *instrides,
+                                int srcSliceY,        int srcSliceH,
+                                unsigned char **oplanes, int *outstrides)
+{
+    int w = c->srcW;
+    int h = srcSliceH;
+    int i,j;
+    int instrides_scl[3];
+    vector unsigned char y0,y1;
+
+    vector signed char  u,v;
+
+    vector signed short Y0,Y1,Y2,Y3;
+    vector signed short U,V;
+    vector signed short vx,ux,uvx;
+    vector signed short vx0,ux0,uvx0;
+    vector signed short vx1,ux1,uvx1;
+    vector signed short R0,G0,B0;
+    vector signed short R1,G1,B1;
+    vector unsigned char R,G,B;
+
+    vector unsigned char *uivP, *vivP;
+    vector unsigned char align_perm;
+
+    vector signed short
+        lCY  = c->CY,
+        lOY  = c->OY,
+        lCRV = c->CRV,
+        lCBU = c->CBU,
+        lCGU = c->CGU,
+        lCGV = c->CGV;
+
+    vector unsigned short lCSHIFT = c->CSHIFT;
+
+    ubyte *y1i   = in[0];
+    ubyte *y2i   = in[0]+w;
+    ubyte *ui    = in[1];
+    ubyte *vi    = in[2];
+
+    vector unsigned char *oute
+        = (vector unsigned char *)
+          (oplanes[0]+srcSliceY*outstrides[0]);
+    vector unsigned char *outo
+        = (vector unsigned char *)
+          (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
+
+
+    instrides_scl[0] = instrides[0];
+    instrides_scl[1] = instrides[1]-w/2;  /* the loop moves ui by w/2 */
+    instrides_scl[2] = instrides[2]-w/2;  /* the loop moves vi by w/2 */
+
+
+    for (i=0;i<h/2;i++) {
+        vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
+        vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
+
+        for (j=0;j<w/16;j++) {
+
+            y0 = vec_ldl (0,y1i);
+            y1 = vec_ldl (0,y2i);
+            uivP = (vector unsigned char *)ui;
+            vivP = (vector unsigned char *)vi;
+
+            align_perm = vec_lvsl (0, ui);
+            u  = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
+
+            align_perm = vec_lvsl (0, vi);
+            v  = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
+            u  = (vector signed char)
+                 vec_sub (u,(vector signed char)
+                          vec_splat((vector signed char){128},0));
+
+            v  = (vector signed char)
+                 vec_sub (v, (vector signed char)
+                          vec_splat((vector signed char){128},0));
+
+            U  = vec_unpackh (u);
+            V  = vec_unpackh (v);
+
+
+            Y0 = vec_unh (y0);
+            Y1 = vec_unl (y0);
+            Y2 = vec_unh (y1);
+            Y3 = vec_unl (y1);
+
+            Y0 = vec_mradds (Y0, lCY, lOY);
+            Y1 = vec_mradds (Y1, lCY, lOY);
+            Y2 = vec_mradds (Y2, lCY, lOY);
+            Y3 = vec_mradds (Y3, lCY, lOY);
+
+            /*   ux  = (CBU*(u<<CSHIFT)+0x4000)>>15 */
+            ux = vec_sl (U, lCSHIFT);
+            ux = vec_mradds (ux, lCBU, (vector signed short){0});
+            ux0  = vec_mergeh (ux,ux);
+            ux1  = vec_mergel (ux,ux);
+
+            /* vx  = (CRV*(v<<CSHIFT)+0x4000)>>15;        */
+            vx = vec_sl (V, lCSHIFT);
+            vx = vec_mradds (vx, lCRV, (vector signed short){0});
+            vx0  = vec_mergeh (vx,vx);
+            vx1  = vec_mergel (vx,vx);
+            /* uvx = ((CGU*u) + (CGV*v))>>15 */
+            uvx = vec_mradds (U, lCGU, (vector signed short){0});
+            uvx = vec_mradds (V, lCGV, uvx);
+            uvx0 = vec_mergeh (uvx,uvx);
+            uvx1 = vec_mergel (uvx,uvx);
+            R0 = vec_add (Y0,vx0);
+            G0 = vec_add (Y0,uvx0);
+            B0 = vec_add (Y0,ux0);
+            R1 = vec_add (Y1,vx1);
+            G1 = vec_add (Y1,uvx1);
+            B1 = vec_add (Y1,ux1);
+            R  = vec_packclp (R0,R1);
+            G  = vec_packclp (G0,G1);
+            B  = vec_packclp (B0,B1);
+
+            out_argb(R,G,B,oute);
+            R0 = vec_add (Y2,vx0);
+            G0 = vec_add (Y2,uvx0);
+            B0 = vec_add (Y2,ux0);
+            R1 = vec_add (Y3,vx1);
+            G1 = vec_add (Y3,uvx1);
+            B1 = vec_add (Y3,ux1);
+            R  = vec_packclp (R0,R1);
+            G  = vec_packclp (G0,G1);
+            B  = vec_packclp (B0,B1);
+
+            out_argb(R,G,B,outo);
+            y1i  += 16;
+            y2i  += 16;
+            ui   += 8;
+            vi   += 8;
+
+        }
+
+        outo  += (outstrides[0])>>4;
+        oute  += (outstrides[0])>>4;
+
+        ui    += instrides_scl[1];
+        vi    += instrides_scl[2];
+        y1i   += instrides_scl[0];
+        y2i   += instrides_scl[0];
+    }
+    return srcSliceH;
+}
+
+#endif
+
+
+DEFCSP420_CVT (yuv2_rgba, out_rgba)
+DEFCSP420_CVT (yuv2_argb, out_argb)
+DEFCSP420_CVT (yuv2_rgb24,  out_rgb24)
+DEFCSP420_CVT (yuv2_bgr24,  out_bgr24)
+
+
+// uyvy|uyvy|uyvy|uyvy
+// 0123 4567 89ab cdef
+static
+const vector unsigned char
+    demux_u = {0x10,0x00,0x10,0x00,
+               0x10,0x04,0x10,0x04,
+               0x10,0x08,0x10,0x08,
+               0x10,0x0c,0x10,0x0c},
+    demux_v = {0x10,0x02,0x10,0x02,
+               0x10,0x06,0x10,0x06,
+               0x10,0x0A,0x10,0x0A,
+               0x10,0x0E,0x10,0x0E},
+    demux_y = {0x10,0x01,0x10,0x03,
+               0x10,0x05,0x10,0x07,
+               0x10,0x09,0x10,0x0B,
+               0x10,0x0D,0x10,0x0F};
+
+/*
+  this is so I can play live CCIR raw video
+*/
+static int altivec_uyvy_rgb32 (SwsContext *c,
+                               unsigned char **in, int *instrides,
+                               int srcSliceY,        int srcSliceH,
+                               unsigned char **oplanes, int *outstrides)
+{
+    int w = c->srcW;
+    int h = srcSliceH;
+    int i,j;
+    vector unsigned char uyvy;
+    vector signed   short Y,U,V;
+    vector signed   short R0,G0,B0,R1,G1,B1;
+    vector unsigned char  R,G,B;
+    vector unsigned char *out;
+    ubyte *img;
+
+    img = in[0];
+    out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
+
+    for (i=0;i<h;i++) {
+        for (j=0;j<w/16;j++) {
+            uyvy = vec_ld (0, img);
+            U = (vector signed short)
+                vec_perm (uyvy, (vector unsigned char){0}, demux_u);
+
+            V = (vector signed short)
+                vec_perm (uyvy, (vector unsigned char){0}, demux_v);
+
+            Y = (vector signed short)
+                vec_perm (uyvy, (vector unsigned char){0}, demux_y);
+
+            cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
+
+            uyvy = vec_ld (16, img);
+            U = (vector signed short)
+                vec_perm (uyvy, (vector unsigned char){0}, demux_u);
+
+            V = (vector signed short)
+                vec_perm (uyvy, (vector unsigned char){0}, demux_v);
+
+            Y = (vector signed short)
+                vec_perm (uyvy, (vector unsigned char){0}, demux_y);
+
+            cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
+
+            R  = vec_packclp (R0,R1);
+            G  = vec_packclp (G0,G1);
+            B  = vec_packclp (B0,B1);
+
+            //      vec_mstbgr24 (R,G,B, out);
+            out_rgba (R,G,B,out);
+
+            img += 32;
+        }
+    }
+    return srcSliceH;
+}
+
+
+
+/* Ok currently the acceleration routine only supports
+   inputs of widths a multiple of 16
+   and heights a multiple 2
+
+   So we just fall back to the C codes for this.
+*/
+SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c)
+{
+    if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
+        return NULL;
+
+    /*
+      and this seems not to matter too much I tried a bunch of
+      videos with abnormal widths and MPlayer crashes elsewhere.
+      mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv
+      boom with X11 bad match.
+
+    */
+    if ((c->srcW & 0xf) != 0)    return NULL;
+
+    switch (c->srcFormat) {
+    case PIX_FMT_YUV410P:
+    case PIX_FMT_YUV420P:
+    /*case IMGFMT_CLPL:        ??? */
+    case PIX_FMT_GRAY8:
+    case PIX_FMT_NV12:
+    case PIX_FMT_NV21:
+        if ((c->srcH & 0x1) != 0)
+            return NULL;
+
+        switch(c->dstFormat){
+        case PIX_FMT_RGB24:
+            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
+            return altivec_yuv2_rgb24;
+        case PIX_FMT_BGR24:
+            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
+            return altivec_yuv2_bgr24;
+        case PIX_FMT_ARGB:
+            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
+            return altivec_yuv2_argb;
+        case PIX_FMT_ABGR:
+            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
+            return altivec_yuv2_abgr;
+        case PIX_FMT_RGBA:
+            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
+            return altivec_yuv2_rgba;
+        case PIX_FMT_BGRA:
+            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
+            return altivec_yuv2_bgra;
+        default: return NULL;
+        }
+        break;
+
+    case PIX_FMT_UYVY422:
+        switch(c->dstFormat){
+        case PIX_FMT_BGR32:
+            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
+            return altivec_uyvy_rgb32;
+        default: return NULL;
+        }
+        break;
+
+    }
+    return NULL;
+}
+
+void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
+{
+    union {
+        signed short tmp[8] __attribute__ ((aligned(16)));
+        vector signed short vec;
+    } buf;
+
+    buf.tmp[0] =  ((0xffffLL) * contrast>>8)>>9;                        //cy
+    buf.tmp[1] =  -256*brightness;                                      //oy
+    buf.tmp[2] =  (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);   //crv
+    buf.tmp[3] =  (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);   //cbu
+    buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));  //cgu
+    buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));  //cgv
+
+
+    c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
+    c->CY   = vec_splat ((vector signed short)buf.vec, 0);
+    c->OY   = vec_splat ((vector signed short)buf.vec, 1);
+    c->CRV  = vec_splat ((vector signed short)buf.vec, 2);
+    c->CBU  = vec_splat ((vector signed short)buf.vec, 3);
+    c->CGU  = vec_splat ((vector signed short)buf.vec, 4);
+    c->CGV  = vec_splat ((vector signed short)buf.vec, 5);
+#if 0
+    {
+    int i;
+    char *v[6]={"cy","oy","crv","cbu","cgu","cgv"};
+    for (i=0; i<6; i++)
+        printf("%s %d ", v[i],buf.tmp[i] );
+        printf("\n");
+    }
+#endif
+    return;
+}
+
+
+void
+altivec_yuv2packedX (SwsContext *c,
+                     int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
+                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
+                     uint8_t *dest, int dstW, int dstY)
+{
+    int i,j;
+    vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
+    vector signed short R0,G0,B0,R1,G1,B1;
+
+    vector unsigned char R,G,B;
+    vector unsigned char *out,*nout;
+
+    vector signed short   RND = vec_splat_s16(1<<3);
+    vector unsigned short SCL = vec_splat_u16(4);
+    unsigned long scratch[16] __attribute__ ((aligned (16)));
+
+    vector signed short *YCoeffs, *CCoeffs;
+
+    YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
+    CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
+
+    out = (vector unsigned char *)dest;
+
+    for (i=0; i<dstW; i+=16){
+        Y0 = RND;
+        Y1 = RND;
+        /* extract 16 coeffs from lumSrc */
+        for (j=0; j<lumFilterSize; j++) {
+            X0 = vec_ld (0,  &lumSrc[j][i]);
+            X1 = vec_ld (16, &lumSrc[j][i]);
+            Y0 = vec_mradds (X0, YCoeffs[j], Y0);
+            Y1 = vec_mradds (X1, YCoeffs[j], Y1);
+        }
+
+        U = RND;
+        V = RND;
+        /* extract 8 coeffs from U,V */
+        for (j=0; j<chrFilterSize; j++) {
+            X  = vec_ld (0, &chrSrc[j][i/2]);
+            U  = vec_mradds (X, CCoeffs[j], U);
+            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
+            V  = vec_mradds (X, CCoeffs[j], V);
+        }
+
+        /* scale and clip signals */
+        Y0 = vec_sra (Y0, SCL);
+        Y1 = vec_sra (Y1, SCL);
+        U  = vec_sra (U,  SCL);
+        V  = vec_sra (V,  SCL);
+
+        Y0 = vec_clip_s16 (Y0);
+        Y1 = vec_clip_s16 (Y1);
+        U  = vec_clip_s16 (U);
+        V  = vec_clip_s16 (V);
+
+        /* now we have
+          Y0= y0 y1 y2 y3 y4 y5 y6 y7     Y1= y8 y9 y10 y11 y12 y13 y14 y15
+          U= u0 u1 u2 u3 u4 u5 u6 u7      V= v0 v1 v2 v3 v4 v5 v6 v7
+
+          Y0= y0 y1 y2 y3 y4 y5 y6 y7    Y1= y8 y9 y10 y11 y12 y13 y14 y15
+          U0= u0 u0 u1 u1 u2 u2 u3 u3    U1= u4 u4 u5 u5 u6 u6 u7 u7
+          V0= v0 v0 v1 v1 v2 v2 v3 v3    V1= v4 v4 v5 v5 v6 v6 v7 v7
+        */
+
+        U0 = vec_mergeh (U,U);
+        V0 = vec_mergeh (V,V);
+
+        U1 = vec_mergel (U,U);
+        V1 = vec_mergel (V,V);
+
+        cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
+        cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
+
+        R  = vec_packclp (R0,R1);
+        G  = vec_packclp (G0,G1);
+        B  = vec_packclp (B0,B1);
+
+        switch(c->dstFormat) {
+            case PIX_FMT_ABGR:  out_abgr  (R,G,B,out); break;
+            case PIX_FMT_BGRA:  out_bgra  (R,G,B,out); break;
+            case PIX_FMT_RGBA:  out_rgba  (R,G,B,out); break;
+            case PIX_FMT_ARGB:  out_argb  (R,G,B,out); break;
+            case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
+            case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
+            default:
+            {
+                /* If this is reached, the caller should have called yuv2packedXinC
+                   instead. */
+                static int printed_error_message;
+                if (!printed_error_message) {
+                    av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
+                           sws_format_name(c->dstFormat));
+                    printed_error_message=1;
+                }
+                return;
+            }
+        }
+    }
+
+    if (i < dstW) {
+        i -= 16;
+
+        Y0 = RND;
+        Y1 = RND;
+        /* extract 16 coeffs from lumSrc */
+        for (j=0; j<lumFilterSize; j++) {
+            X0 = vec_ld (0,  &lumSrc[j][i]);
+            X1 = vec_ld (16, &lumSrc[j][i]);
+            Y0 = vec_mradds (X0, YCoeffs[j], Y0);
+            Y1 = vec_mradds (X1, YCoeffs[j], Y1);
+        }
+
+        U = RND;
+        V = RND;
+        /* extract 8 coeffs from U,V */
+        for (j=0; j<chrFilterSize; j++) {
+            X  = vec_ld (0, &chrSrc[j][i/2]);
+            U  = vec_mradds (X, CCoeffs[j], U);
+            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
+            V  = vec_mradds (X, CCoeffs[j], V);
+        }
+
+        /* scale and clip signals */
+        Y0 = vec_sra (Y0, SCL);
+        Y1 = vec_sra (Y1, SCL);
+        U  = vec_sra (U,  SCL);
+        V  = vec_sra (V,  SCL);
+
+        Y0 = vec_clip_s16 (Y0);
+        Y1 = vec_clip_s16 (Y1);
+        U  = vec_clip_s16 (U);
+        V  = vec_clip_s16 (V);
+
+        /* now we have
+           Y0= y0 y1 y2 y3 y4 y5 y6 y7     Y1= y8 y9 y10 y11 y12 y13 y14 y15
+           U = u0 u1 u2 u3 u4 u5 u6 u7     V = v0 v1 v2 v3 v4 v5 v6 v7
+
+           Y0= y0 y1 y2 y3 y4 y5 y6 y7    Y1= y8 y9 y10 y11 y12 y13 y14 y15
+           U0= u0 u0 u1 u1 u2 u2 u3 u3    U1= u4 u4 u5 u5 u6 u6 u7 u7
+           V0= v0 v0 v1 v1 v2 v2 v3 v3    V1= v4 v4 v5 v5 v6 v6 v7 v7
+        */
+
+        U0 = vec_mergeh (U,U);
+        V0 = vec_mergeh (V,V);
+
+        U1 = vec_mergel (U,U);
+        V1 = vec_mergel (V,V);
+
+        cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
+        cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
+
+        R  = vec_packclp (R0,R1);
+        G  = vec_packclp (G0,G1);
+        B  = vec_packclp (B0,B1);
+
+        nout = (vector unsigned char *)scratch;
+        switch(c->dstFormat) {
+            case PIX_FMT_ABGR:  out_abgr  (R,G,B,nout); break;
+            case PIX_FMT_BGRA:  out_bgra  (R,G,B,nout); break;
+            case PIX_FMT_RGBA:  out_rgba  (R,G,B,nout); break;
+            case PIX_FMT_ARGB:  out_argb  (R,G,B,nout); break;
+            case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
+            case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
+            default:
+                /* Unreachable, I think. */
+                av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
+                       sws_format_name(c->dstFormat));
+                return;
+        }
+
+        memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
+    }
+
+}
diff --git a/libswscale/yuv2rgb_bfin.c b/libswscale/yuv2rgb_bfin.c
new file mode 100644
index 0000000000..58cc5b6a35
--- /dev/null
+++ b/libswscale/yuv2rgb_bfin.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
+ *
+ * Blackfin video color space converter operations
+ * convert I420 YV12 to RGB in various formats
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+#include "config.h"
+#include <unistd.h>
+#include "rgb2rgb.h"
+#include "swscale.h"
+#include "swscale_internal.h"
+
+#ifdef __FDPIC__
+#define L1CODE __attribute__ ((l1_text))
+#else
+#define L1CODE
+#endif
+
+void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+                              int w, uint32_t *coeffs) L1CODE;
+
+void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+                              int w, uint32_t *coeffs) L1CODE;
+
+void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+                             int w, uint32_t *coeffs) L1CODE;
+
+typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+                            int w, uint32_t *coeffs);
+
+
+static void bfin_prepare_coefficients (SwsContext *c, int rgb, int masks)
+{
+    int oy;
+    oy      = c->yOffset&0xffff;
+    oy      = oy >> 3; // keep everything U8.0 for offset calculation
+
+    c->oc   = 128*0x01010101U;
+    c->oy   =  oy*0x01010101U;
+
+    /* copy 64bit vector coeffs down to 32bit vector coeffs */
+    c->cy  = c->yCoeff;
+    c->zero = 0;
+
+    if (rgb) {
+        c->crv = c->vrCoeff;
+        c->cbu = c->ubCoeff;
+        c->cgu = c->ugCoeff;
+        c->cgv = c->vgCoeff;
+    } else {
+        c->crv = c->ubCoeff;
+        c->cbu = c->vrCoeff;
+        c->cgu = c->vgCoeff;
+        c->cgv = c->ugCoeff;
+    }
+
+
+    if (masks == 555) {
+        c->rmask = 0x001f * 0x00010001U;
+        c->gmask = 0x03e0 * 0x00010001U;
+        c->bmask = 0x7c00 * 0x00010001U;
+    } else if (masks == 565) {
+        c->rmask = 0x001f * 0x00010001U;
+        c->gmask = 0x07e0 * 0x00010001U;
+        c->bmask = 0xf800 * 0x00010001U;
+    }
+}
+
+static int core_yuv420_rgb (SwsContext *c,
+                            uint8_t **in, int *instrides,
+                            int srcSliceY, int srcSliceH,
+                            uint8_t **oplanes, int *outstrides,
+                            ltransform lcscf, int rgb, int masks)
+{
+    uint8_t *py,*pu,*pv,*op;
+    int w  = instrides[0];
+    int h2 = srcSliceH>>1;
+    int i;
+
+    bfin_prepare_coefficients (c, rgb, masks);
+
+    py = in[0];
+    pu = in[1+(1^rgb)];
+    pv = in[1+(0^rgb)];
+
+    op = oplanes[0] + srcSliceY*outstrides[0];
+
+    for (i=0;i<h2;i++) {
+
+        lcscf (py, pu, pv, op, w, &c->oy);
+
+        py += instrides[0];
+        op += outstrides[0];
+
+        lcscf (py, pu, pv, op, w, &c->oy);
+
+        py += instrides[0];
+        pu += instrides[1];
+        pv += instrides[2];
+        op += outstrides[0];
+    }
+
+    return srcSliceH;
+}
+
+
+static int bfin_yuv420_rgb555 (SwsContext *c,
+                               uint8_t **in, int *instrides,
+                               int srcSliceY, int srcSliceH,
+                               uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
+                            ff_bfin_yuv2rgb555_line, 1, 555);
+}
+
+static int bfin_yuv420_bgr555 (SwsContext *c,
+                               uint8_t **in, int *instrides,
+                               int srcSliceY, int srcSliceH,
+                               uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
+                            ff_bfin_yuv2rgb555_line, 0, 555);
+}
+
+static int bfin_yuv420_rgb24 (SwsContext *c,
+                              uint8_t **in, int *instrides,
+                              int srcSliceY, int srcSliceH,
+                              uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
+                            ff_bfin_yuv2rgb24_line, 1, 888);
+}
+
+static int bfin_yuv420_bgr24 (SwsContext *c,
+                              uint8_t **in, int *instrides,
+                              int srcSliceY, int srcSliceH,
+                              uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
+                            ff_bfin_yuv2rgb24_line, 0, 888);
+}
+
+static int bfin_yuv420_rgb565 (SwsContext *c,
+                               uint8_t **in, int *instrides,
+                               int srcSliceY, int srcSliceH,
+                               uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
+                            ff_bfin_yuv2rgb565_line, 1, 565);
+}
+
+static int bfin_yuv420_bgr565 (SwsContext *c,
+                               uint8_t **in, int *instrides,
+                               int srcSliceY, int srcSliceH,
+                               uint8_t **oplanes, int *outstrides)
+{
+    return core_yuv420_rgb (c, in, instrides, srcSliceY, srcSliceH, oplanes, outstrides,
+                            ff_bfin_yuv2rgb565_line, 0, 565);
+}
+
+
+SwsFunc ff_bfin_yuv2rgb_get_func_ptr (SwsContext *c)
+{
+    SwsFunc f;
+
+    switch(c->dstFormat) {
+    case PIX_FMT_RGB555: f = bfin_yuv420_rgb555; break;
+    case PIX_FMT_BGR555: f = bfin_yuv420_bgr555; break;
+    case PIX_FMT_RGB565: f = bfin_yuv420_rgb565; break;
+    case PIX_FMT_BGR565: f = bfin_yuv420_bgr565; break;
+    case PIX_FMT_RGB24:  f = bfin_yuv420_rgb24;  break;
+    case PIX_FMT_BGR24:  f = bfin_yuv420_bgr24;  break;
+    default:
+        return 0;
+    }
+
+    av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n",
+           sws_format_name (c->dstFormat));
+
+    return f;
+}
diff --git a/libswscale/yuv2rgb_mlib.c b/libswscale/yuv2rgb_mlib.c
new file mode 100644
index 0000000000..68247914e7
--- /dev/null
+++ b/libswscale/yuv2rgb_mlib.c
@@ -0,0 +1,85 @@
+/*
+ * software YUV to RGB converter using mediaLib
+ *
+ * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <mlib_types.h>
+#include <mlib_status.h>
+#include <mlib_sys.h>
+#include <mlib_video.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "swscale.h"
+
+static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dst[], int dstStride[]){
+    if(c->srcFormat == PIX_FMT_YUV422P){
+        srcStride[1] *= 2;
+        srcStride[2] *= 2;
+    }
+
+    assert(srcStride[1] == srcStride[2]);
+
+    mlib_VideoColorYUV2ARGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
+                               srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
+    return srcSliceH;
+}
+
+static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dst[], int dstStride[]){
+    if(c->srcFormat == PIX_FMT_YUV422P){
+        srcStride[1] *= 2;
+        srcStride[2] *= 2;
+    }
+
+    assert(srcStride[1] == srcStride[2]);
+
+    mlib_VideoColorYUV2ABGR420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
+                               srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
+    return srcSliceH;
+}
+
+static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                              int srcSliceH, uint8_t* dst[], int dstStride[]){
+    if(c->srcFormat == PIX_FMT_YUV422P){
+        srcStride[1] *= 2;
+        srcStride[2] *= 2;
+    }
+
+    assert(srcStride[1] == srcStride[2]);
+
+    mlib_VideoColorYUV2RGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
+                              srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
+    return srcSliceH;
+}
+
+
+SwsFunc sws_yuv2rgb_init_mlib(SwsContext *c)
+{
+    switch(c->dstFormat){
+    case PIX_FMT_RGB24: return mlib_YUV2RGB420_24;
+    case PIX_FMT_BGR32: return mlib_YUV2ARGB420_32;
+    case PIX_FMT_RGB32: return mlib_YUV2ABGR420_32;
+    default: return NULL;
+    }
+}
+
diff --git a/libswscale/yuv2rgb_template.c b/libswscale/yuv2rgb_template.c
new file mode 100644
index 0000000000..f55568b0ab
--- /dev/null
+++ b/libswscale/yuv2rgb_template.c
@@ -0,0 +1,453 @@
+/*
+ * yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology"
+ *
+ * Copyright (C) 2000, Silicon Integrated System Corp
+ *
+ * Author: Olie Lho <ollie@sis.com.tw>
+ *
+ * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
+ * MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support)
+ * context / deglobalize stuff by Michael Niedermayer
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video decoder
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mpeg2dec; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#undef MOVNTQ
+#undef EMMS
+#undef SFENCE
+
+#if HAVE_AMD3DNOW
+/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
+#define EMMS     "femms"
+#else
+#define EMMS     "emms"
+#endif
+
+#if HAVE_MMX2
+#define MOVNTQ "movntq"
+#define SFENCE "sfence"
+#else
+#define MOVNTQ "movq"
+#define SFENCE "/nop"
+#endif
+
+#define YUV2RGB \
+    /* Do the multiply part of the conversion for even and odd pixels,
+       register usage:
+       mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
+       mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
+       mm6 -> Y even, mm7 -> Y odd */\
+    /* convert the chroma part */\
+    "punpcklbw %%mm4, %%mm0;" /* scatter 4 Cb 00 u3 00 u2 00 u1 00 u0 */ \
+    "punpcklbw %%mm4, %%mm1;" /* scatter 4 Cr 00 v3 00 v2 00 v1 00 v0 */ \
+\
+    "psllw $3, %%mm0;" /* Promote precision */ \
+    "psllw $3, %%mm1;" /* Promote precision */ \
+\
+    "psubsw "U_OFFSET"(%4), %%mm0;" /* Cb -= 128 */ \
+    "psubsw "V_OFFSET"(%4), %%mm1;" /* Cr -= 128 */ \
+\
+    "movq %%mm0, %%mm2;" /* Copy 4 Cb 00 u3 00 u2 00 u1 00 u0 */ \
+    "movq %%mm1, %%mm3;" /* Copy 4 Cr 00 v3 00 v2 00 v1 00 v0 */ \
+\
+    "pmulhw "UG_COEFF"(%4), %%mm2;" /* Mul Cb with green coeff -> Cb green */ \
+    "pmulhw "VG_COEFF"(%4), %%mm3;" /* Mul Cr with green coeff -> Cr green */ \
+\
+    "pmulhw "UB_COEFF"(%4), %%mm0;" /* Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0 */\
+    "pmulhw "VR_COEFF"(%4), %%mm1;" /* Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0 */\
+\
+    "paddsw %%mm3, %%mm2;" /* Cb green + Cr green -> Cgreen */\
+\
+    /* convert the luma part */\
+    "movq %%mm6, %%mm7;" /* Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */\
+    "pand "MANGLE(mmx_00ffw)", %%mm6;" /* get Y even 00 Y6 00 Y4 00 Y2 00 Y0 */\
+\
+    "psrlw $8, %%mm7;" /* get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 */\
+\
+    "psllw $3, %%mm6;" /* Promote precision */\
+    "psllw $3, %%mm7;" /* Promote precision */\
+\
+    "psubw "Y_OFFSET"(%4), %%mm6;" /* Y -= 16 */\
+    "psubw "Y_OFFSET"(%4), %%mm7;" /* Y -= 16 */\
+\
+    "pmulhw "Y_COEFF"(%4), %%mm6;" /* Mul 4 Y even 00 y6 00 y4 00 y2 00 y0 */\
+    "pmulhw "Y_COEFF"(%4), %%mm7;" /* Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1 */\
+\
+    /* Do the addition part of the conversion for even and odd pixels,
+       register usage:
+       mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
+       mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
+       mm6 -> Y even, mm7 -> Y odd */\
+    "movq %%mm0, %%mm3;" /* Copy Cblue */\
+    "movq %%mm1, %%mm4;" /* Copy Cred */\
+    "movq %%mm2, %%mm5;" /* Copy Cgreen */\
+\
+    "paddsw %%mm6, %%mm0;" /* Y even + Cblue 00 B6 00 B4 00 B2 00 B0 */\
+    "paddsw %%mm7, %%mm3;" /* Y odd + Cblue 00 B7 00 B5 00 B3 00 B1 */\
+\
+    "paddsw %%mm6, %%mm1;" /* Y even + Cred 00 R6 00 R4 00 R2 00 R0 */\
+    "paddsw %%mm7, %%mm4;" /* Y odd + Cred 00 R7 00 R5 00 R3 00 R1 */\
+\
+    "paddsw %%mm6, %%mm2;" /* Y even + Cgreen 00 G6 00 G4 00 G2 00 G0 */\
+    "paddsw %%mm7, %%mm5;" /* Y odd + Cgreen 00 G7 00 G5 00 G3 00 G1 */\
+\
+    /* Limit RGB even to 0..255 */\
+    "packuswb %%mm0, %%mm0;" /* B6 B4 B2 B0  B6 B4 B2 B0 */\
+    "packuswb %%mm1, %%mm1;" /* R6 R4 R2 R0  R6 R4 R2 R0 */\
+    "packuswb %%mm2, %%mm2;" /* G6 G4 G2 G0  G6 G4 G2 G0 */\
+\
+    /* Limit RGB odd to 0..255 */\
+    "packuswb %%mm3, %%mm3;" /* B7 B5 B3 B1  B7 B5 B3 B1 */\
+    "packuswb %%mm4, %%mm4;" /* R7 R5 R3 R1  R7 R5 R3 R1 */\
+    "packuswb %%mm5, %%mm5;" /* G7 G5 G3 G1  G7 G5 G3 G1 */\
+\
+    /* Interleave RGB even and odd */\
+    "punpcklbw %%mm3, %%mm0;" /* B7 B6 B5 B4 B3 B2 B1 B0 */\
+    "punpcklbw %%mm4, %%mm1;" /* R7 R6 R5 R4 R3 R2 R1 R0 */\
+    "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\
+
+
+#define YUV422_UNSHIFT                   \
+    if(c->srcFormat == PIX_FMT_YUV422P){ \
+        srcStride[1] *= 2;               \
+        srcStride[2] *= 2;               \
+    }                                    \
+
+#define YUV2RGB_LOOP(depth)                                   \
+    h_size= (c->dstW+7)&~7;                                   \
+    if(h_size*depth > FFABS(dstStride[0])) h_size-=8;         \
+\
+    __asm__ volatile ("pxor %mm4, %mm4;" /* zero mm4 */ );    \
+    for (y= 0; y<srcSliceH; y++ ) {                           \
+        uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0]; \
+        uint8_t *py = src[0] + y*srcStride[0];                \
+        uint8_t *pu = src[1] + (y>>1)*srcStride[1];           \
+        uint8_t *pv = src[2] + (y>>1)*srcStride[2];           \
+        long index= -h_size/2;                                \
+
+#define YUV2RGB_INIT                                                       \
+        /* This MMX assembly code deals with a SINGLE scan line at a time, \
+         * it converts 8 pixels in each iteration. */                      \
+        __asm__ volatile (                                                 \
+        /* load data for start of next scan line */                        \
+        "movd    (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \
+        "movd    (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \
+        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
+        /*                                                                 \
+        ".balign 16     \n\t"                                              \
+        */                                                                 \
+        "1:             \n\t"                                              \
+        /* No speed difference on my p3@500 with prefetch,                 \
+         * if it is faster for anyone with -benchmark then tell me.        \
+        PREFETCH" 64(%0) \n\t"                                             \
+        PREFETCH" 64(%1) \n\t"                                             \
+        PREFETCH" 64(%2) \n\t"                                             \
+        */                                                                 \
+
+#define YUV2RGB_ENDLOOP(depth) \
+        "add $"AV_STRINGIFY(depth*8)", %1    \n\t" \
+        "add                       $4, %0    \n\t" \
+        " js                       1b        \n\t" \
+\
+        : "+r" (index), "+r" (image) \
+        : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) \
+        ); \
+    } \
+    __asm__ volatile (EMMS); \
+    return srcSliceH; \
+
+static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
+    int y, h_size;
+
+    YUV422_UNSHIFT
+    YUV2RGB_LOOP(2)
+
+        c->blueDither= ff_dither8[y&1];
+        c->greenDither= ff_dither4[y&1];
+        c->redDither= ff_dither8[(y+1)&1];
+
+        YUV2RGB_INIT
+        YUV2RGB
+
+#ifdef DITHER1XBPP
+        "paddusb "BLUE_DITHER"(%4), %%mm0;"
+        "paddusb "GREEN_DITHER"(%4), %%mm2;"
+        "paddusb "RED_DITHER"(%4), %%mm1;"
+#endif
+        /* mask unneeded bits off */
+        "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
+        "pand "MANGLE(mmx_grnmask)", %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */
+        "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
+
+        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
+        "pxor %%mm4, %%mm4;" /* zero mm4 */
+
+        "movq %%mm0, %%mm5;" /* Copy B7-B0 */
+        "movq %%mm2, %%mm7;" /* Copy G7-G0 */
+
+        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
+        "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
+        "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
+
+        "psllw  $3, %%mm2;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */
+        "por %%mm2, %%mm0;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */
+
+        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+        MOVNTQ "      %%mm0, (%1);" /* store pixel 0-3 */
+
+        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
+        "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
+        "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
+
+        "psllw        $3, %%mm7;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */
+        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+
+        "por       %%mm7, %%mm5;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */
+        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+
+        MOVNTQ "   %%mm5, 8 (%1);" /* store pixel 4-7 */
+
+    YUV2RGB_ENDLOOP(2)
+}
+
+static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
+    int y, h_size;
+
+    YUV422_UNSHIFT
+    YUV2RGB_LOOP(2)
+
+        c->blueDither= ff_dither8[y&1];
+        c->greenDither= ff_dither8[y&1];
+        c->redDither= ff_dither8[(y+1)&1];
+
+        YUV2RGB_INIT
+        YUV2RGB
+
+#ifdef DITHER1XBPP
+        "paddusb "BLUE_DITHER"(%4), %%mm0  \n\t"
+        "paddusb "GREEN_DITHER"(%4), %%mm2  \n\t"
+        "paddusb "RED_DITHER"(%4), %%mm1  \n\t"
+#endif
+
+        /* mask unneeded bits off */
+        "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
+        "pand "MANGLE(mmx_redmask)", %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */
+        "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
+
+        "psrlw   $3, %%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
+        "psrlw   $1, %%mm1;" /* 0_r7r6r5  r4r3_0_0 0_r7r6r5 r4r3_0_0 */
+        "pxor %%mm4, %%mm4;" /* zero mm4 */
+
+        "movq %%mm0, %%mm5;" /* Copy B7-B0 */
+        "movq %%mm2, %%mm7;" /* Copy G7-G0 */
+
+        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
+        "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */
+        "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
+
+        "psllw  $2, %%mm2;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */
+        "por %%mm2, %%mm0;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */
+
+        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+        MOVNTQ "      %%mm0, (%1);"  /* store pixel 0-3 */
+
+        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
+        "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */
+        "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
+
+        "psllw        $2, %%mm7;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */
+        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+
+        "por       %%mm7, %%mm5;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */
+        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+
+        MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
+
+    YUV2RGB_ENDLOOP(2)
+}
+
+static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
+    int y, h_size;
+
+    YUV422_UNSHIFT
+    YUV2RGB_LOOP(3)
+
+        YUV2RGB_INIT
+        YUV2RGB
+        /* mm0=B, %%mm2=G, %%mm1=R */
+#if HAVE_MMX2
+        "movq "MANGLE(ff_M24A)", %%mm4     \n\t"
+        "movq "MANGLE(ff_M24C)", %%mm7     \n\t"
+        "pshufw $0x50, %%mm0, %%mm5     \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */
+        "pshufw $0x50, %%mm2, %%mm3     \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */
+        "pshufw $0x00, %%mm1, %%mm6     \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */
+
+        "pand   %%mm4, %%mm5            \n\t" /*    B2        B1       B0 */
+        "pand   %%mm4, %%mm3            \n\t" /*    G2        G1       G0 */
+        "pand   %%mm7, %%mm6            \n\t" /*       R1        R0       */
+
+        "psllq     $8, %%mm3            \n\t" /* G2        G1       G0    */
+        "por    %%mm5, %%mm6            \n\t"
+        "por    %%mm3, %%mm6            \n\t"
+        MOVNTQ" %%mm6, (%1)             \n\t"
+
+        "psrlq     $8, %%mm2            \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */
+        "pshufw $0xA5, %%mm0, %%mm5     \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */
+        "pshufw $0x55, %%mm2, %%mm3     \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */
+        "pshufw $0xA5, %%mm1, %%mm6     \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */
+
+        "pand "MANGLE(ff_M24B)", %%mm5     \n\t" /* B5       B4        B3    */
+        "pand          %%mm7, %%mm3     \n\t" /*       G4        G3       */
+        "pand          %%mm4, %%mm6     \n\t" /*    R4        R3       R2 */
+
+        "por    %%mm5, %%mm3            \n\t" /* B5    G4 B4     G3 B3    */
+        "por    %%mm3, %%mm6            \n\t"
+        MOVNTQ" %%mm6, 8(%1)            \n\t"
+
+        "pshufw $0xFF, %%mm0, %%mm5     \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */
+        "pshufw $0xFA, %%mm2, %%mm3     \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */
+        "pshufw $0xFA, %%mm1, %%mm6     \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */
+        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+
+        "pand          %%mm7, %%mm5     \n\t" /*       B7        B6       */
+        "pand          %%mm4, %%mm3     \n\t" /*    G7        G6       G5 */
+        "pand "MANGLE(ff_M24B)", %%mm6     \n\t" /* R7       R6        R5    */
+        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+\
+        "por          %%mm5, %%mm3      \n\t"
+        "por          %%mm3, %%mm6      \n\t"
+        MOVNTQ"       %%mm6, 16(%1)     \n\t"
+        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+        "pxor         %%mm4, %%mm4      \n\t"
+
+#else
+
+        "pxor      %%mm4, %%mm4     \n\t"
+        "movq      %%mm0, %%mm5     \n\t" /* B */
+        "movq      %%mm1, %%mm6     \n\t" /* R */
+        "punpcklbw %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */
+        "punpcklbw %%mm4, %%mm1     \n\t" /* 0R0R0R0R 0 */
+        "punpckhbw %%mm2, %%mm5     \n\t" /* GBGBGBGB 2 */
+        "punpckhbw %%mm4, %%mm6     \n\t" /* 0R0R0R0R 2 */
+        "movq      %%mm0, %%mm7     \n\t" /* GBGBGBGB 0 */
+        "movq      %%mm5, %%mm3     \n\t" /* GBGBGBGB 2 */
+        "punpcklwd %%mm1, %%mm7     \n\t" /* 0RGB0RGB 0 */
+        "punpckhwd %%mm1, %%mm0     \n\t" /* 0RGB0RGB 1 */
+        "punpcklwd %%mm6, %%mm5     \n\t" /* 0RGB0RGB 2 */
+        "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */
+
+        "movq      %%mm7, %%mm2     \n\t" /* 0RGB0RGB 0 */
+        "movq      %%mm0, %%mm6     \n\t" /* 0RGB0RGB 1 */
+        "movq      %%mm5, %%mm1     \n\t" /* 0RGB0RGB 2 */
+        "movq      %%mm3, %%mm4     \n\t" /* 0RGB0RGB 3 */
+
+        "psllq       $40, %%mm7     \n\t" /* RGB00000 0 */
+        "psllq       $40, %%mm0     \n\t" /* RGB00000 1 */
+        "psllq       $40, %%mm5     \n\t" /* RGB00000 2 */
+        "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */
+
+        "punpckhdq %%mm2, %%mm7     \n\t" /* 0RGBRGB0 0 */
+        "punpckhdq %%mm6, %%mm0     \n\t" /* 0RGBRGB0 1 */
+        "punpckhdq %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */
+        "punpckhdq %%mm4, %%mm3     \n\t" /* 0RGBRGB0 3 */
+
+        "psrlq        $8, %%mm7     \n\t" /* 00RGBRGB 0 */
+        "movq      %%mm0, %%mm6     \n\t" /* 0RGBRGB0 1 */
+        "psllq       $40, %%mm0     \n\t" /* GB000000 1 */
+        "por       %%mm0, %%mm7     \n\t" /* GBRGBRGB 0 */
+        MOVNTQ"    %%mm7, (%1)      \n\t"
+
+        "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+
+        "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */
+        "movq      %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */
+        "psllq       $24, %%mm5     \n\t" /* BRGB0000 2 */
+        "por       %%mm5, %%mm6     \n\t" /* BRGBRGBR 1 */
+        MOVNTQ"    %%mm6, 8(%1)     \n\t"
+
+        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+
+        "psrlq       $40, %%mm1     \n\t" /* 000000RG 2 */
+        "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */
+        "por       %%mm3, %%mm1     \n\t" /* RGBRGBRG 2 */
+        MOVNTQ"    %%mm1, 16(%1)    \n\t"
+
+        "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+        "pxor      %%mm4, %%mm4     \n\t"
+#endif
+
+    YUV2RGB_ENDLOOP(3)
+}
+
+#define RGB_PLANAR2PACKED32                                             \
+    /* convert RGB plane to RGB packed format,                          \
+       mm0 ->  B, mm1 -> R, mm2 -> G, mm3 -> A,                         \
+       mm4 -> GB, mm5 -> AR pixel 4-7,                                  \
+       mm6 -> GB, mm7 -> AR pixel 0-3 */                                \
+    "movq      %%mm0, %%mm6;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
+    "movq      %%mm1, %%mm7;"   /* R7 R6 R5 R4 R3 R2 R1 R0 */           \
+\
+    "movq      %%mm0, %%mm4;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
+    "movq      %%mm1, %%mm5;"   /* R7 R6 R5 R4 R3 R2 R1 R0 */           \
+\
+    "punpcklbw %%mm2, %%mm6;"   /* G3 B3 G2 B2 G1 B1 G0 B0 */           \
+    "punpcklbw %%mm3, %%mm7;"   /* A3 R3 A2 R2 A1 R1 A0 R0 */           \
+\
+    "punpcklwd %%mm7, %%mm6;"   /* A1 R1 B1 G1 A0 R0 B0 G0 */           \
+    MOVNTQ "   %%mm6, (%1);"    /* Store ARGB1 ARGB0 */                 \
+\
+    "movq      %%mm0, %%mm6;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
+    "punpcklbw %%mm2, %%mm6;"   /* G3 B3 G2 B2 G1 B1 G0 B0 */           \
+\
+    "punpckhwd %%mm7, %%mm6;"   /* A3 R3 G3 B3 A2 R2 B3 G2 */           \
+    MOVNTQ "   %%mm6, 8 (%1);"  /* Store ARGB3 ARGB2 */                 \
+\
+    "punpckhbw %%mm2, %%mm4;"   /* G7 B7 G6 B6 G5 B5 G4 B4 */           \
+    "punpckhbw %%mm3, %%mm5;"   /* A7 R7 A6 R6 A5 R5 A4 R4 */           \
+\
+    "punpcklwd %%mm5, %%mm4;"   /* A5 R5 B5 G5 A4 R4 B4 G4 */           \
+    MOVNTQ "   %%mm4, 16 (%1);" /* Store ARGB5 ARGB4 */                 \
+\
+    "movq      %%mm0, %%mm4;"   /* B7 B6 B5 B4 B3 B2 B1 B0 */           \
+    "punpckhbw %%mm2, %%mm4;"   /* G7 B7 G6 B6 G5 B5 G4 B4 */           \
+\
+    "punpckhwd %%mm5, %%mm4;"   /* A7 R7 G7 B7 A6 R6 B6 G6 */           \
+    MOVNTQ "   %%mm4, 24 (%1);" /* Store ARGB7 ARGB6 */                 \
+\
+    "movd 4 (%2, %0), %%mm0;"   /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \
+    "movd 4 (%3, %0), %%mm1;"   /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \
+\
+    "pxor         %%mm4, %%mm4;" /* zero mm4 */                         \
+    "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
+
+static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                                       int srcSliceH, uint8_t* dst[], int dstStride[]){
+    int y, h_size;
+
+    YUV422_UNSHIFT
+    YUV2RGB_LOOP(4)
+
+        YUV2RGB_INIT
+        YUV2RGB
+        "pcmpeqd   %%mm3, %%mm3;"   /* fill mm3 */
+        RGB_PLANAR2PACKED32
+
+    YUV2RGB_ENDLOOP(4)
+}
diff --git a/libswscale/yuv2rgb_vis.c b/libswscale/yuv2rgb_vis.c
new file mode 100644
index 0000000000..2e2737aa9f
--- /dev/null
+++ b/libswscale/yuv2rgb_vis.c
@@ -0,0 +1,209 @@
+/*
+ * VIS optimized software YUV to RGB converter
+ * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+
+#include "swscale.h"
+#include "swscale_internal.h"
+
+#define YUV2RGB_INIT \
+    "wr %%g0, 0x10, %%gsr \n\t" \
+    "ldd [%5], %%f32      \n\t" \
+    "ldd [%5+8], %%f34    \n\t" \
+    "ldd [%5+16], %%f36   \n\t" \
+    "ldd [%5+24], %%f38   \n\t" \
+    "ldd [%5+32], %%f40   \n\t" \
+    "ldd [%5+40], %%f42   \n\t" \
+    "ldd [%5+48], %%f44   \n\t" \
+    "ldd [%5+56], %%f46   \n\t" \
+    "ldd [%5+64], %%f48   \n\t" \
+    "ldd [%5+72], %%f50   \n\t"
+
+#define YUV2RGB_KERNEL \
+    /* ^^^^ f0=Y f3=u f5=v */ \
+    "fmul8x16 %%f3, %%f48, %%f6   \n\t" \
+    "fmul8x16 %%f19, %%f48, %%f22 \n\t" \
+    "fmul8x16 %%f5, %%f44, %%f8   \n\t" \
+    "fmul8x16 %%f21, %%f44, %%f24 \n\t" \
+    "fmul8x16 %%f0, %%f42, %%f0   \n\t" \
+    "fmul8x16 %%f16, %%f42, %%f16 \n\t" \
+    "fmul8x16 %%f3, %%f50, %%f2   \n\t" \
+    "fmul8x16 %%f19, %%f50, %%f18 \n\t" \
+    "fmul8x16 %%f5, %%f46, %%f4   \n\t" \
+    "fmul8x16 %%f21, %%f46, %%f20 \n\t" \
+    \
+    "fpsub16 %%f6, %%f34, %%f6   \n\t" /* 1 */ \
+    "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \
+    "fpsub16 %%f8, %%f38, %%f8   \n\t" /* 3 */ \
+    "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \
+    "fpsub16 %%f0, %%f32, %%f0   \n\t" /* 0 */ \
+    "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \
+    "fpsub16 %%f2, %%f36, %%f2   \n\t" /* 2 */ \
+    "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \
+    "fpsub16 %%f4, %%f40, %%f4   \n\t" /* 4 */ \
+    "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \
+    \
+    "fpadd16 %%f0, %%f8, %%f8    \n\t" /* Gt */ \
+    "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \
+    "fpadd16 %%f0, %%f4, %%f4    \n\t" /* R */ \
+    "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \
+    "fpadd16 %%f0, %%f6, %%f6    \n\t" /* B */ \
+    "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \
+    "fpadd16 %%f8, %%f2, %%f2    \n\t" /* G */ \
+    "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \
+    \
+    "fpack16 %%f4, %%f4    \n\t" \
+    "fpack16 %%f20, %%f20  \n\t" \
+    "fpack16 %%f6, %%f6    \n\t" \
+    "fpack16 %%f22, %%f22  \n\t" \
+    "fpack16 %%f2, %%f2    \n\t" \
+    "fpack16 %%f18, %%f18  \n\t"
+
+
+
+// FIXME: must be changed to set alpha to 255 instead of 0
+static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                           int srcSliceH, uint8_t* dst[], int dstStride[]){
+  int y, out1, out2, out3, out4, out5, out6;
+
+  for(y=0;y < srcSliceH;++y) {
+      __asm__ volatile (
+          YUV2RGB_INIT
+          "wr %%g0, 0xd2, %%asi        \n\t" /* ASI_FL16_P */
+          "1:                          \n\t"
+          "ldda [%1] %%asi, %%f2       \n\t"
+          "ldda [%1+2] %%asi, %%f18    \n\t"
+          "ldda [%2] %%asi, %%f4       \n\t"
+          "ldda [%2+2] %%asi, %%f20    \n\t"
+          "ld [%0], %%f0               \n\t"
+          "ld [%0+4], %%f16            \n\t"
+          "fpmerge %%f3, %%f3, %%f2    \n\t"
+          "fpmerge %%f19, %%f19, %%f18 \n\t"
+          "fpmerge %%f5, %%f5, %%f4    \n\t"
+          "fpmerge %%f21, %%f21, %%f20 \n\t"
+          YUV2RGB_KERNEL
+          "fzero %%f0                  \n\t"
+          "fpmerge %%f4, %%f6, %%f8    \n\t"  // r,b,t1
+          "fpmerge %%f20, %%f22, %%f24 \n\t"  // r,b,t1
+          "fpmerge %%f0, %%f2, %%f10   \n\t"  // 0,g,t2
+          "fpmerge %%f0, %%f18, %%f26  \n\t"  // 0,g,t2
+          "fpmerge %%f10, %%f8, %%f4   \n\t"  // t2,t1,msb
+          "fpmerge %%f26, %%f24, %%f20 \n\t"  // t2,t1,msb
+          "fpmerge %%f11, %%f9, %%f6   \n\t"  // t2,t1,lsb
+          "fpmerge %%f27, %%f25, %%f22 \n\t"  // t2,t1,lsb
+          "std %%f4, [%3]              \n\t"
+          "std %%f20, [%3+16]          \n\t"
+          "std %%f6, [%3+8]            \n\t"
+          "std %%f22, [%3+24]          \n\t"
+
+          "add %0, 8, %0   \n\t"
+          "add %1, 4, %1   \n\t"
+          "add %2, 4, %2   \n\t"
+          "subcc %4, 8, %4 \n\t"
+          "bne 1b          \n\t"
+          "add %3, 32, %3  \n\t" //delay slot
+          : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
+          : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+((y+srcSliceY)>>1)*srcStride[1]),
+            "2" (src[2]+((y+srcSliceY)>>1)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]),
+            "4" (c->dstW),
+            "5" (c->sparc_coeffs)
+      );
+  }
+
+  return srcSliceH;
+}
+
+// FIXME: must be changed to set alpha to 255 instead of 0
+static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                           int srcSliceH, uint8_t* dst[], int dstStride[]){
+  int y, out1, out2, out3, out4, out5, out6;
+
+  for(y=0;y < srcSliceH;++y) {
+      __asm__ volatile (
+          YUV2RGB_INIT
+          "wr %%g0, 0xd2, %%asi        \n\t" /* ASI_FL16_P */
+          "1:                          \n\t"
+          "ldda [%1] %%asi, %%f2       \n\t"
+          "ldda [%1+2] %%asi, %%f18    \n\t"
+          "ldda [%2] %%asi, %%f4       \n\t"
+          "ldda [%2+2] %%asi, %%f20    \n\t"
+          "ld [%0], %%f0               \n\t"
+          "ld [%0+4], %%f16            \n\t"
+          "fpmerge %%f3, %%f3, %%f2    \n\t"
+          "fpmerge %%f19, %%f19, %%f18 \n\t"
+          "fpmerge %%f5, %%f5, %%f4    \n\t"
+          "fpmerge %%f21, %%f21, %%f20 \n\t"
+          YUV2RGB_KERNEL
+          "fzero %%f0 \n\t"
+          "fpmerge %%f4, %%f6, %%f8    \n\t"  // r,b,t1
+          "fpmerge %%f20, %%f22, %%f24 \n\t"  // r,b,t1
+          "fpmerge %%f0, %%f2, %%f10   \n\t"  // 0,g,t2
+          "fpmerge %%f0, %%f18, %%f26  \n\t"  // 0,g,t2
+          "fpmerge %%f10, %%f8, %%f4   \n\t"  // t2,t1,msb
+          "fpmerge %%f26, %%f24, %%f20 \n\t"  // t2,t1,msb
+          "fpmerge %%f11, %%f9, %%f6   \n\t"  // t2,t1,lsb
+          "fpmerge %%f27, %%f25, %%f22 \n\t"  // t2,t1,lsb
+          "std %%f4, [%3]              \n\t"
+          "std %%f20, [%3+16]          \n\t"
+          "std %%f6, [%3+8]            \n\t"
+          "std %%f22, [%3+24]          \n\t"
+
+          "add %0, 8, %0   \n\t"
+          "add %1, 4, %1   \n\t"
+          "add %2, 4, %2   \n\t"
+          "subcc %4, 8, %4 \n\t"
+          "bne 1b          \n\t"
+          "add %3, 32, %3  \n\t" //delay slot
+          : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
+          : "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+(y+srcSliceY)*srcStride[1]),
+            "2" (src[2]+(y+srcSliceY)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]),
+            "4" (c->dstW),
+            "5" (c->sparc_coeffs)
+      );
+  }
+
+  return srcSliceH;
+}
+
+SwsFunc sws_yuv2rgb_init_vis(SwsContext *c) {
+    c->sparc_coeffs[5]=c->yCoeff;
+    c->sparc_coeffs[6]=c->vgCoeff;
+    c->sparc_coeffs[7]=c->vrCoeff;
+    c->sparc_coeffs[8]=c->ubCoeff;
+    c->sparc_coeffs[9]=c->ugCoeff;
+
+    c->sparc_coeffs[0]=(((int16_t)c->yOffset*(int16_t)c->yCoeff >>11) & 0xffff) * 0x0001000100010001ULL;
+    c->sparc_coeffs[1]=(((int16_t)c->uOffset*(int16_t)c->ubCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
+    c->sparc_coeffs[2]=(((int16_t)c->uOffset*(int16_t)c->ugCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
+    c->sparc_coeffs[3]=(((int16_t)c->vOffset*(int16_t)c->vgCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
+    c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
+
+    if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) {
+        av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n");
+        return vis_422P_ARGB32;
+    }
+    else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) {
+        av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n");
+        return vis_420P_ARGB32;
+    }
+    return NULL;
+}

From 030896c76a74dcfeed0b6eb88d5389fe9238aae3 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 13 Apr 2009 10:11:31 +0000
Subject: [PATCH 010/315] Merge split of README <-> LICENSE files along with
 the clarifications.

Originally committed as revision 18489 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 LICENSE | 37 +++++++++++++++++++++++++++++++++++++
 README  | 30 +-----------------------------
 2 files changed, 38 insertions(+), 29 deletions(-)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000..8c74356708
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,37 @@
+Most files in FFmpeg are under the GNU Lesser General Public License version 2.1
+or later (LGPL v2.1+). Read the file COPYING.LGPL for details. Some other files
+have a MIT/X11/BSD-style license. In combination the LGPL v2.1+ applies to
+FFmpeg.
+
+Some optional parts of FFmpeg are licensed under the GNU General Public License
+version 2 or later (GPL v2+). See the file COPYING.GPL for details. None of
+these parts are used by default, you have to explicitly pass --enable-gpl to
+configure to activate them. In this case, FFmpeg's license changes to GPL v2+.
+
+Specifically, the GPL parts of FFmpeg are
+
+- libpostproc
+- some x86 and AltiVec optimizations in libswscale
+- optional x86 optimizations in the files
+  libavcodec/x86/h264_deblock_sse2.asm
+  libavcodec/x86/h264_idct_sse2.asm
+  libavcodec/x86/idct_mmx.c
+- the AC-3 decoder in libavcodec/ac3dec.c
+- the X11 grabber in libavdevice/x11grab.c
+
+Some external libraries, e.g. libx264, are under GPL and can be used in
+conjunction with FFmpeg. They require --enable-gpl to be passed to configure
+as well.
+
+The nonfree external libraries libamrnb and libamrwb can be hooked up in FFmpeg.
+You need to pass --enable-nonfree to configure to enable them. Employ this
+option with care as FFmpeg then becomes nonfree and unredistributable.
+
+There are a handful of files under other licensing terms, namely:
+
+* The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
+  are taken from libjpeg, see the top of the files for licensing details.
+
+* The file libavcodec/fdctref.c is copyrighted by the MPEG Software Simulation
+  Group with all rights reserved. It is only used to create a DCT test program
+  and not compiled into libavcodec.
diff --git a/README b/README
index 5a5cb16a96..e907e90223 100644
--- a/README
+++ b/README
@@ -9,32 +9,4 @@ FFmpeg README
 2) Licensing
 ------------
 
-* Read the file COPYING.LGPL. FFmpeg and the associated libraries EXCEPT
-  for libpostproc are licensed under the GNU Lesser General Public License.
-
-* libpostproc is distributed under the GNU General Public License, see the
-  file COPYING.GPL for details. Its compilation and use in FFmpeg is optional.
-
-* libswscale contains some optional processor-specific optimizations that are
-  distributed under the GNU General Public License.
-
-* The files libavcodec/x86/idct_mmx.c, libavcodec/x86/h264_deblock_sse2.asm
-  and libavcodec/x86/h264_idct_sse2.asm are distributed under the GNU General
-  Public License. They are strictly optimizations and their use is optional.
-
-* The file libavcodec/ac3dec.c is distributed under the GNU General Public
-  License.  In order for (E-)AC-3 decoding to work you need to enable GPL
-  components.
-
-* The file libavdevice/x11grab.c is distributed under the GNU General
-  Public License. X11 grabbing is optional.
-
-* The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
-  are taken from libjpeg, see the top of the files for licensing details.
-
-* The file libavcodec/fdctref.c is copyrighted by the MPEG Software Simulation
-  Group with all rights reserved. It is only used to create a DCT test program
-  and not compiled into libavcodec.
-
-* Some external libraries are under GPL. If you wish to use them with FFmpeg,
-  you have to configure FFmpeg as GPL as well.
+* See the LICENSE file.

From 241c55aabe3dfeef2caf413bb2ec37504f488056 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 13 Apr 2009 10:15:04 +0000
Subject: [PATCH 011/315] Merge fix for GPL code that erroneously made it into
 the LGPL build.

Originally committed as revision 18490 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libswscale/swscale_template.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 3262b6ee85..6f2e243052 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -2219,7 +2219,7 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
     }
     else // fast bilinear upscale / crap downscale
     {
-#if ARCH_X86
+#if ARCH_X86 && CONFIG_GPL
 #if HAVE_MMX2
         int i;
 #if defined(PIC)
@@ -2492,7 +2492,7 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
     }
     else // fast bilinear upscale / crap downscale
     {
-#if ARCH_X86
+#if ARCH_X86 && CONFIG_GPL
 #if HAVE_MMX2
         int i;
 #if defined(PIC)

From 266f6af570c87af4c46f8207aa43e1d88422ed73 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 13 Apr 2009 10:15:48 +0000
Subject: [PATCH 012/315] Merge LGPL relicensing of AltiVec optimizations.

Originally committed as revision 18491 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 LICENSE                               |  2 +-
 libswscale/swscale_altivec_template.c | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/LICENSE b/LICENSE
index 8c74356708..286cd9016d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -11,7 +11,7 @@ configure to activate them. In this case, FFmpeg's license changes to GPL v2+.
 Specifically, the GPL parts of FFmpeg are
 
 - libpostproc
-- some x86 and AltiVec optimizations in libswscale
+- some x86 optimizations in libswscale
 - optional x86 optimizations in the files
   libavcodec/x86/h264_deblock_sse2.asm
   libavcodec/x86/h264_idct_sse2.asm
diff --git a/libswscale/swscale_altivec_template.c b/libswscale/swscale_altivec_template.c
index a008b966e8..0bf620bc83 100644
--- a/libswscale/swscale_altivec_template.c
+++ b/libswscale/swscale_altivec_template.c
@@ -6,18 +6,18 @@
  *
  * This file is part of FFmpeg.
  *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with FFmpeg; if not, write to the Free Software
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 

From b0b57fa13b4b0f1d4c0d0994eda1ae208257ed14 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 13 Apr 2009 10:23:10 +0000
Subject: [PATCH 013/315] Merge replacement of MPEG group reference DCT code.

Originally committed as revision 18492 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 LICENSE               |   4 --
 doc/TODO              |   1 -
 libavcodec/Makefile   |   3 +-
 libavcodec/dct-test.c |  64 ++++++++---------
 libavcodec/dctref.c   | 121 ++++++++++++++++++++++++++++++++
 libavcodec/fdctref.c  | 157 ------------------------------------------
 6 files changed, 154 insertions(+), 196 deletions(-)
 create mode 100644 libavcodec/dctref.c
 delete mode 100644 libavcodec/fdctref.c

diff --git a/LICENSE b/LICENSE
index 286cd9016d..7cb34d7597 100644
--- a/LICENSE
+++ b/LICENSE
@@ -31,7 +31,3 @@ There are a handful of files under other licensing terms, namely:
 
 * The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
   are taken from libjpeg, see the top of the files for licensing details.
-
-* The file libavcodec/fdctref.c is copyrighted by the MPEG Software Simulation
-  Group with all rights reserved. It is only used to create a DCT test program
-  and not compiled into libavcodec.
diff --git a/doc/TODO b/doc/TODO
index 6c0a824107..f03270ec13 100644
--- a/doc/TODO
+++ b/doc/TODO
@@ -81,7 +81,6 @@ unassigned TODO: (unordered)
 - add support for using mplayers video filters to ffmpeg
 - H264 encoder
 - per MB ratecontrol (so VCD and such do work better)
-- replace/rewrite libavcodec/fdctref.c
 - write a script which iteratively changes all functions between always_inline and noinline and benchmarks the result to find the best set of inlined functions
 - convert all the non SIMD asm into small asm vs. C testcases and submit them to the gcc devels so they can improve gcc
 - generic audio mixing API
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index afa5fac152..5067354da1 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -533,5 +533,4 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
 
 include $(SUBDIR)../subdir.mak
 
-$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)fdctref.o $(SUBDIR)aandcttab.o
-$(SUBDIR)fft-test$(EXESUF): $(SUBDIR)fdctref.o
+$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index fef142fe94..48e7c5a091 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -46,9 +46,9 @@
 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
 
 /* reference fdct/idct */
-void fdct(DCTELEM *block);
-void idct(DCTELEM *block);
-void init_fdct(void);
+void ff_ref_fdct(DCTELEM *block);
+void ff_ref_idct(DCTELEM *block);
+void ff_ref_dct_init(void);
 
 void ff_mmx_idct(DCTELEM *data);
 void ff_mmxext_idct(DCTELEM *data);
@@ -90,57 +90,57 @@ struct algo {
 static int cpu_flags;
 
 struct algo algos[] = {
-  {"REF-DBL",         0, fdct,               fdct, NO_PERM},
-  {"FAAN",            0, ff_faandct,         fdct, FAAN_SCALE},
-  {"FAANI",           1, ff_faanidct,        idct, NO_PERM},
-  {"IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM},
-  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM},
-  {"REF-DBL",         1, idct,               idct, NO_PERM},
-  {"INT",             1, j_rev_dct,          idct, MMX_PERM},
-  {"SIMPLE-C",        1, ff_simple_idct,     idct, NO_PERM},
+  {"REF-DBL",         0, ff_ref_fdct,        ff_ref_fdct, NO_PERM},
+  {"FAAN",            0, ff_faandct,         ff_ref_fdct, FAAN_SCALE},
+  {"FAANI",           1, ff_faanidct,        ff_ref_idct, NO_PERM},
+  {"IJG-AAN-INT",     0, fdct_ifast,         ff_ref_fdct, SCALE_PERM},
+  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
+  {"REF-DBL",         1, ff_ref_idct,        ff_ref_idct, NO_PERM},
+  {"INT",             1, j_rev_dct,          ff_ref_idct, MMX_PERM},
+  {"SIMPLE-C",        1, ff_simple_idct,     ff_ref_idct, NO_PERM},
 
 #if HAVE_MMX
-  {"MMX",             0, ff_fdct_mmx,        fdct, NO_PERM, FF_MM_MMX},
+  {"MMX",             0, ff_fdct_mmx,        ff_ref_fdct, NO_PERM, FF_MM_MMX},
 #if HAVE_MMX2
-  {"MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM, FF_MM_MMXEXT},
-  {"SSE2",            0, ff_fdct_sse2,       fdct, NO_PERM, FF_MM_SSE2},
+  {"MMX2",            0, ff_fdct_mmx2,       ff_ref_fdct, NO_PERM, FF_MM_MMXEXT},
+  {"SSE2",            0, ff_fdct_sse2,       ff_ref_fdct, NO_PERM, FF_MM_SSE2},
 #endif
 
 #if CONFIG_GPL
-  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM, FF_MM_MMX},
-  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM, FF_MM_MMXEXT},
+  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        ff_ref_idct, MMX_PERM, FF_MM_MMX},
+  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     ff_ref_idct, MMX_PERM, FF_MM_MMXEXT},
 #endif
-  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX},
-  {"XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM, FF_MM_MMX},
-  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM, FF_MM_MMXEXT},
-  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  idct, SSE2_PERM, FF_MM_SSE2},
+  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
+  {"XVID-MMX",        1, ff_idct_xvid_mmx,   ff_ref_idct, NO_PERM, FF_MM_MMX},
+  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  ff_ref_idct, NO_PERM, FF_MM_MMXEXT},
+  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
 #endif
 
 #if HAVE_ALTIVEC
-  {"altivecfdct",     0, fdct_altivec,       fdct, NO_PERM, FF_MM_ALTIVEC},
+  {"altivecfdct",     0, fdct_altivec,       ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
 #endif
 
 #if ARCH_BFIN
-  {"BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM},
-  {"BFINidct",        1, ff_bfin_idct,       idct, NO_PERM},
+  {"BFINfdct",        0, ff_bfin_fdct,       ff_ref_fdct, NO_PERM},
+  {"BFINidct",        1, ff_bfin_idct,       ff_ref_idct, NO_PERM},
 #endif
 
 #if ARCH_ARM
-  {"SIMPLE-ARM",      1, simple_idct_ARM,    idct, NO_PERM },
-  {"INT-ARM",         1, j_rev_dct_ARM,      idct, MMX_PERM },
+  {"SIMPLE-ARM",      1, simple_idct_ARM,    ff_ref_idct, NO_PERM },
+  {"INT-ARM",         1, j_rev_dct_ARM,      ff_ref_idct, MMX_PERM },
 #if HAVE_ARMV5TE
-  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, idct, NO_PERM },
+  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
 #endif
 #if HAVE_ARMV6
-  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, idct, MMX_PERM },
+  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
 #endif
 #if HAVE_NEON
-  {"SIMPLE-NEON",     1, ff_simple_idct_neon, idct, PARTTRANS_PERM },
+  {"SIMPLE-NEON",     1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
 #endif
 #endif /* ARCH_ARM */
 
 #if ARCH_ALPHA
-  {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  idct, NO_PERM },
+  {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  ff_ref_idct, NO_PERM },
 #endif
 
   { 0 }
@@ -222,7 +222,7 @@ void dct_error(const char *name, int is_idct,
             for(i=0;i<64;i++)
                 block1[i] = (random() % 512) -256;
             if (is_idct){
-                fdct(block1);
+                ff_ref_fdct(block1);
 
                 for(i=0;i<64;i++)
                     block1[i]>>=3;
@@ -336,7 +336,7 @@ void dct_error(const char *name, int is_idct,
         for(i=0;i<64;i++)
             block1[i] = (random() % 512) -256;
         if (is_idct){
-            fdct(block1);
+            ff_ref_fdct(block1);
 
             for(i=0;i<64;i++)
                 block1[i]>>=3;
@@ -559,7 +559,7 @@ int main(int argc, char **argv)
     int test=1;
     cpu_flags = mm_support();
 
-    init_fdct();
+    ff_ref_dct_init();
     idct_mmx_init();
 
     for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
diff --git a/libavcodec/dctref.c b/libavcodec/dctref.c
new file mode 100644
index 0000000000..faad057a9a
--- /dev/null
+++ b/libavcodec/dctref.c
@@ -0,0 +1,121 @@
+/*
+ * reference discrete cosine transform (double precision)
+ * Copyright (C) 2009 Dylan Yudaken
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/dctref.c
+ * reference discrete cosine transform (double precision)
+ *
+ * @author Dylan Yudaken (dyudaken at gmail)
+ *
+ * @note This file could be optimized a lot, but is for
+ * reference and so readability is better.
+ */
+
+#include "libavutil/mathematics.h"
+static double coefficients[8 * 8];
+
+/**
+ * Initialize the double precision discrete cosine transform
+ * functions fdct & idct.
+ */
+av_cold void ff_ref_dct_init(void)
+{
+    unsigned int i, j;
+
+    for (j = 0; j < 8; ++j) {
+        coefficients[j] = sqrt(0.125);
+        for (i = 8; i < 64; i += 8) {
+            coefficients[i + j] = 0.5 * cos(i * (j + 0.5) * M_PI / 64.0);
+        }
+    }
+}
+
+/**
+ * Transform 8x8 block of data with a double precision forward DCT <br>
+ * This is a reference implementation.
+ *
+ * @param block pointer to 8x8 block of data to transform
+ */
+void ff_ref_fdct(short *block)
+{
+    /* implement the equation: block = coefficients * block * coefficients' */
+
+    unsigned int i, j, k;
+    double out[8 * 8];
+
+    /* out = coefficients * block */
+    for (i = 0; i < 64; i += 8) {
+        for (j = 0; j < 8; ++j) {
+            double tmp = 0;
+            for (k = 0; k < 8; ++k) {
+                tmp += coefficients[i + k] * block[k * 8 + j];
+            }
+            out[i + j] = tmp * 8;
+        }
+    }
+
+    /* block = out * (coefficients') */
+    for (j = 0; j < 8; ++j) {
+        for (i = 0; i < 64; i += 8) {
+            double tmp = 0;
+            for (k = 0; k < 8; ++k) {
+                tmp += out[i + k] * coefficients[j * 8 + k];
+            }
+            block[i + j] = floor(tmp + 0.499999999999);
+        }
+    }
+}
+
+/**
+ * Transform 8x8 block of data with a double precision inverse DCT <br>
+ * This is a reference implementation.
+ *
+ * @param block pointer to 8x8 block of data to transform
+ */
+void ff_ref_idct(short *block)
+{
+    /* implement the equation: block = (coefficients') * block * coefficients */
+
+    unsigned int i, j, k;
+    double out[8 * 8];
+
+    /* out = block * coefficients */
+    for (i = 0; i < 64; i += 8) {
+        for (j = 0; j < 8; ++j) {
+            double tmp = 0;
+            for (k = 0; k < 8; ++k) {
+                tmp += block[i + k] * coefficients[k * 8 + j];
+            }
+            out[i + j] = tmp;
+        }
+    }
+
+    /* block = (coefficients') * out */
+    for (i = 0; i < 8; ++i) {
+        for (j = 0; j < 8; ++j) {
+            double tmp = 0;
+            for (k = 0; k < 64; k += 8) {
+                tmp += coefficients[k + i] * out[k + j];
+            }
+            block[i * 8 + j] = floor(tmp + 0.5);
+        }
+    }
+}
diff --git a/libavcodec/fdctref.c b/libavcodec/fdctref.c
deleted file mode 100644
index 164883dcbc..0000000000
--- a/libavcodec/fdctref.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- * @file libavcodec/fdctref.c
- * forward discrete cosine transform, double precision.
- */
-
-/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
-
-/*
- * Disclaimer of Warranty
- *
- * These software programs are available to the user without any license fee or
- * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
- * any and all warranties, whether express, implied, or statuary, including any
- * implied warranties or merchantability or of fitness for a particular
- * purpose.  In no event shall the copyright-holder be liable for any
- * incidental, punitive, or consequential damages of any kind whatsoever
- * arising from the use of these programs.
- *
- * This disclaimer of warranty extends to the user of these programs and user's
- * customers, employees, agents, transferees, successors, and assigns.
- *
- * The MPEG Software Simulation Group does not represent or warrant that the
- * programs furnished hereunder are free of infringement of any third-party
- * patents.
- *
- * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
- * are subject to royalty fees to patent holders.  Many of these patents are
- * general enough such that they are unavoidable regardless of implementation
- * design.
- */
-
-#include <math.h>
-
-#ifndef PI
-# ifdef M_PI
-#  define PI M_PI
-# else
-#  define PI 3.14159265358979323846
-# endif
-#endif
-
-/* global declarations */
-void init_fdct (void);
-void fdct (short *block);
-
-/* private data */
-static double c[8][8]; /* transform coefficients */
-
-void init_fdct(void)
-{
-  int i, j;
-  double s;
-
-  for (i=0; i<8; i++)
-  {
-    s = (i==0) ? sqrt(0.125) : 0.5;
-
-    for (j=0; j<8; j++)
-      c[i][j] = s * cos((PI/8.0)*i*(j+0.5));
-  }
-}
-
-void fdct(block)
-short *block;
-{
-        register int i, j;
-        double s;
-        double tmp[64];
-
-        for(i = 0; i < 8; i++)
-            for(j = 0; j < 8; j++)
-            {
-                    s = 0.0;
-
-/*
- *                     for(k = 0; k < 8; k++)
- *                         s += c[j][k] * block[8 * i + k];
- */
-                s += c[j][0] * block[8 * i + 0];
-                s += c[j][1] * block[8 * i + 1];
-                s += c[j][2] * block[8 * i + 2];
-                s += c[j][3] * block[8 * i + 3];
-                s += c[j][4] * block[8 * i + 4];
-                s += c[j][5] * block[8 * i + 5];
-                s += c[j][6] * block[8 * i + 6];
-                s += c[j][7] * block[8 * i + 7];
-
-                    tmp[8 * i + j] = s;
-            }
-
-        for(j = 0; j < 8; j++)
-            for(i = 0; i < 8; i++)
-            {
-                    s = 0.0;
-
-/*
- *                       for(k = 0; k < 8; k++)
- *                    s += c[i][k] * tmp[8 * k + j];
- */
-                s += c[i][0] * tmp[8 * 0 + j];
-                s += c[i][1] * tmp[8 * 1 + j];
-                s += c[i][2] * tmp[8 * 2 + j];
-                s += c[i][3] * tmp[8 * 3 + j];
-                s += c[i][4] * tmp[8 * 4 + j];
-                s += c[i][5] * tmp[8 * 5 + j];
-                s += c[i][6] * tmp[8 * 6 + j];
-                s += c[i][7] * tmp[8 * 7 + j];
-                s*=8.0;
-
-                    block[8 * i + j] = (short)floor(s + 0.499999);
-/*
- * reason for adding 0.499999 instead of 0.5:
- * s is quite often x.5 (at least for i and/or j = 0 or 4)
- * and setting the rounding threshold exactly to 0.5 leads to an
- * extremely high arithmetic implementation dependency of the result;
- * s being between x.5 and x.500001 (which is now incorrectly rounded
- * downwards instead of upwards) is assumed to occur less often
- * (if at all)
- */
-      }
-}
-
-/* perform IDCT matrix multiply for 8x8 coefficient block */
-
-void idct(block)
-short *block;
-{
-  int i, j, k, v;
-  double partial_product;
-  double tmp[64];
-
-  for (i=0; i<8; i++)
-    for (j=0; j<8; j++)
-    {
-      partial_product = 0.0;
-
-      for (k=0; k<8; k++)
-        partial_product+= c[k][j]*block[8*i+k];
-
-      tmp[8*i+j] = partial_product;
-    }
-
-  /* Transpose operation is integrated into address mapping by switching
-     loop order of i and j */
-
-  for (j=0; j<8; j++)
-    for (i=0; i<8; i++)
-    {
-      partial_product = 0.0;
-
-      for (k=0; k<8; k++)
-        partial_product+= c[k][i]*tmp[8*k+j];
-
-      v = (int) floor(partial_product+0.5);
-      block[8*i+j] = v;
-    }
-}

From 8d003e22caf05981473c5048184a062537be496c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 19 Apr 2009 13:22:08 +0000
Subject: [PATCH 014/315] Merge automatic addition of -fno-common to CFLAGS for
 Windows from trunk.

Originally committed as revision 18619 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure b/configure
index 281ce24a17..c77ee8268f 100755
--- a/configure
+++ b/configure
@@ -1572,6 +1572,7 @@ case $target_os in
         SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.def) -Wl,--enable-runtime-pseudo-reloc -Wl,--enable-auto-image-base'
         objformat="win32"
         enable dos_paths
+        check_cflags -fno-common
         if ! enabled x86_64; then
             check_cpp_condition _mingw.h "(__MINGW32_MAJOR_VERSION > 3) || (__MINGW32_MAJOR_VERSION == 3 && __MINGW32_MINOR_VERSION >= 15)" ||
                 die "ERROR: MinGW runtime version must be >= 3.15."
@@ -1596,6 +1597,7 @@ case $target_os in
         SHFLAGS='-shared -Wl,--enable-auto-image-base'
         objformat="win32"
         enable dos_paths
+        check_cflags -fno-common
         ;;
     *-dos|freedos|opendos)
         disable ffplay ffserver vhook

From d8ef221893116c8eb5052602878480e2709e33ec Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 6 May 2009 12:10:27 +0000
Subject: [PATCH 015/315] Merge factorization of license check code from trunk.

Originally committed as revision 18756 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/configure b/configure
index c77ee8268f..73d80f9e25 100755
--- a/configure
+++ b/configure
@@ -1682,23 +1682,20 @@ EOF
 fi
 
 
-if ! enabled gpl; then
-    die_gpl_disabled(){
-        name=$1
-        shift
-        enabled_any $@ && die "$name is under GPL and --enable-gpl is not specified."
-    }
-    die_gpl_disabled "The Postprocessing code" postproc
-    die_gpl_disabled "libx264"                 libx264
-    die_gpl_disabled "libxvidcore"             libxvid
-    die_gpl_disabled "FAAD2"                   libfaad2
-    die_gpl_disabled "The X11 grabber"         x11grab
-    die_gpl_disabled "The software scaler"     swscale
-fi
+die_license_disabled() {
+    enabled $1 || enabled $2 && die "$2 is $1 and --enable-$1 is not specified."
+}
+
+die_license_disabled gpl libfaad2
+die_license_disabled gpl libx264
+die_license_disabled gpl libxvid
+die_license_disabled gpl postproc
+die_license_disabled gpl swscale
+die_license_disabled gpl x11grab
+
+die_license_disabled nonfree libamr_nb
+die_license_disabled nonfree libamr_wb
 
-if ! enabled nonfree && enabled_any libamr_nb libamr_wb; then
-    die "libamr is nonfree and --enable-nonfree is not specified."
-fi
 
 check_deps $ARCH_EXT_LIST
 

From 4f3ce007046580d04306430b43fa8985d4285548 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 6 May 2009 12:12:05 +0000
Subject: [PATCH 016/315] Merge marking of libfaac as non-free from trunk.

Originally committed as revision 18757 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 LICENSE   | 7 ++++---
 configure | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/LICENSE b/LICENSE
index 7cb34d7597..7c0a3e71b6 100644
--- a/LICENSE
+++ b/LICENSE
@@ -23,9 +23,10 @@ Some external libraries, e.g. libx264, are under GPL and can be used in
 conjunction with FFmpeg. They require --enable-gpl to be passed to configure
 as well.
 
-The nonfree external libraries libamrnb and libamrwb can be hooked up in FFmpeg.
-You need to pass --enable-nonfree to configure to enable them. Employ this
-option with care as FFmpeg then becomes nonfree and unredistributable.
+The nonfree external libraries libamrnb, libamrwb and libfaac can be hooked up
+in FFmpeg. You need to pass --enable-nonfree to configure to enable them. Employ
+this option with care as FFmpeg then becomes nonfree and unredistributable.
+Note that libfaac claims to be LGPL, but is not.
 
 There are a handful of files under other licensing terms, namely:
 
diff --git a/configure b/configure
index 73d80f9e25..0db8a45888 100755
--- a/configure
+++ b/configure
@@ -1695,6 +1695,7 @@ die_license_disabled gpl x11grab
 
 die_license_disabled nonfree libamr_nb
 die_license_disabled nonfree libamr_wb
+die_license_disabled nonfree libfaac
 
 
 check_deps $ARCH_EXT_LIST

From 2f14399e40ce521e152947637af477ba42164338 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Wed, 6 May 2009 15:29:59 +0000
Subject: [PATCH 017/315] Backport r18214 from trunk.

Originally committed as revision 18759 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 0db8a45888..5f9b33234f 100755
--- a/configure
+++ b/configure
@@ -1569,7 +1569,7 @@ case $target_os in
         SLIB_INSTALL_EXTRA_CMD='-install -m 644 $(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib) "$(SHLIBDIR)/$(SLIBNAME:$(SLIBSUF)=.lib)"; \
             install -m 644 $(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib) "$(SHLIBDIR)/$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib)"'
         SLIB_UNINSTALL_EXTRA_CMD='rm -f "$(SHLIBDIR)/$(SLIBNAME:$(SLIBSUF)=.lib)"'
-        SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.def) -Wl,--enable-runtime-pseudo-reloc -Wl,--enable-auto-image-base'
+        SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.def) -Wl,--out-implib,$(SUBDIR)lib$(SLIBNAME:$(SLIBSUF)=.dll.a) -Wl,--enable-runtime-pseudo-reloc -Wl,--enable-auto-image-base'
         objformat="win32"
         enable dos_paths
         check_cflags -fno-common

From eade41f3ec1505404866858db28ce2fac091963a Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Wed, 6 May 2009 15:31:02 +0000
Subject: [PATCH 018/315] Backport r17995 from trunk.

Originally committed as revision 18760 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 5f9b33234f..9e0231918e 100755
--- a/configure
+++ b/configure
@@ -1565,7 +1565,7 @@ case $target_os in
         SLIBSUF=".dll"
         SLIBNAME_WITH_VERSION='$(SLIBPREF)$(FULLNAME)-$(LIBVERSION)$(SLIBSUF)'
         SLIBNAME_WITH_MAJOR='$(SLIBPREF)$(FULLNAME)-$(LIBMAJOR)$(SLIBSUF)'
-        SLIB_EXTRA_CMD='-lib /machine:$(LIBTARGET) /def:$$(@:$(SLIBSUF)=.def) /out:$(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib)'
+        SLIB_EXTRA_CMD='-lib.exe /machine:$(LIBTARGET) /def:$$(@:$(SLIBSUF)=.def) /out:$(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib)'
         SLIB_INSTALL_EXTRA_CMD='-install -m 644 $(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib) "$(SHLIBDIR)/$(SLIBNAME:$(SLIBSUF)=.lib)"; \
             install -m 644 $(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib) "$(SHLIBDIR)/$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib)"'
         SLIB_UNINSTALL_EXTRA_CMD='rm -f "$(SHLIBDIR)/$(SLIBNAME:$(SLIBSUF)=.lib)"'

From 7056dd763fc7a5c04ae70372f7bddc7dc7b80042 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Thu, 7 May 2009 22:46:42 +0000
Subject: [PATCH 019/315] Revert unapproved changes.

Originally committed as revision 18770 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 9e0231918e..0db8a45888 100755
--- a/configure
+++ b/configure
@@ -1565,11 +1565,11 @@ case $target_os in
         SLIBSUF=".dll"
         SLIBNAME_WITH_VERSION='$(SLIBPREF)$(FULLNAME)-$(LIBVERSION)$(SLIBSUF)'
         SLIBNAME_WITH_MAJOR='$(SLIBPREF)$(FULLNAME)-$(LIBMAJOR)$(SLIBSUF)'
-        SLIB_EXTRA_CMD='-lib.exe /machine:$(LIBTARGET) /def:$$(@:$(SLIBSUF)=.def) /out:$(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib)'
+        SLIB_EXTRA_CMD='-lib /machine:$(LIBTARGET) /def:$$(@:$(SLIBSUF)=.def) /out:$(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib)'
         SLIB_INSTALL_EXTRA_CMD='-install -m 644 $(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib) "$(SHLIBDIR)/$(SLIBNAME:$(SLIBSUF)=.lib)"; \
             install -m 644 $(SUBDIR)$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib) "$(SHLIBDIR)/$(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.lib)"'
         SLIB_UNINSTALL_EXTRA_CMD='rm -f "$(SHLIBDIR)/$(SLIBNAME:$(SLIBSUF)=.lib)"'
-        SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.def) -Wl,--out-implib,$(SUBDIR)lib$(SLIBNAME:$(SLIBSUF)=.dll.a) -Wl,--enable-runtime-pseudo-reloc -Wl,--enable-auto-image-base'
+        SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.def) -Wl,--enable-runtime-pseudo-reloc -Wl,--enable-auto-image-base'
         objformat="win32"
         enable dos_paths
         check_cflags -fno-common

From 3499f0f3e399c59d77bf15b595068e5f0581f104 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 10 May 2009 11:02:03 +0000
Subject: [PATCH 020/315] Merge fix for license check function from trunk.

Originally committed as revision 18782 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 0db8a45888..0dc58a8d39 100755
--- a/configure
+++ b/configure
@@ -1683,7 +1683,7 @@ fi
 
 
 die_license_disabled() {
-    enabled $1 || enabled $2 && die "$2 is $1 and --enable-$1 is not specified."
+    enabled $1 || { enabled $2 && die "$2 is $1 and --enable-$1 is not specified."; }
 }
 
 die_license_disabled gpl libfaad2

From d6c23ec06a3162160165c0883cc55275e9f1a60d Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 23 May 2009 12:04:11 +0000
Subject: [PATCH 021/315] Merge explanation of changelog sort order from trunk.

Originally committed as revision 18913 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Changelog b/Changelog
index b8791902f1..0ae43ee6dc 100644
--- a/Changelog
+++ b/Changelog
@@ -1,3 +1,7 @@
+Entries are sorted chronologically from oldest to youngest within each release,
+releases are sorted from youngest to oldest.
+
+
 version 0.5:
 
 - The "device" muxers and demuxers are now in a new libavdevice library

From 0ae7dcae2ce885b42f3944584ca510252a3ce316 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 23 May 2009 12:18:53 +0000
Subject: [PATCH 022/315] Mention post 0.5 commits in the changelog.

Originally committed as revision 18914 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Changelog b/Changelog
index 0ae43ee6dc..53c3e66bb3 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,14 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 
+version 0.5.1:
+
+- build system updates
+- documentation updates
+- fix for GPL code in libswscale that was erroneously activated
+- AltiVec code in libswscale is now LGPL
+
+
 version 0.5:
 
 - The "device" muxers and demuxers are now in a new libavdevice library

From a4d8ebfaa1ff87a6c2460e4aceaeefb2182c67df Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 23 May 2009 12:58:44 +0000
Subject: [PATCH 023/315] Merge GPL --> LGPL conversion of AC-3 decoder from
 trunk.

Originally committed as revision 18915 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog           |   2 +
 LICENSE             |   1 -
 configure           |   2 -
 libavcodec/ac3dec.c | 122 +++++++++++++++++++++-----------------------
 4 files changed, 60 insertions(+), 67 deletions(-)

diff --git a/Changelog b/Changelog
index 53c3e66bb3..8dc20b45d1 100644
--- a/Changelog
+++ b/Changelog
@@ -8,6 +8,8 @@ version 0.5.1:
 - documentation updates
 - fix for GPL code in libswscale that was erroneously activated
 - AltiVec code in libswscale is now LGPL
+- remaining GPL parts in AC-3 decoder converted to LGPL
+
 
 
 version 0.5:
diff --git a/LICENSE b/LICENSE
index 7c0a3e71b6..929f65bebc 100644
--- a/LICENSE
+++ b/LICENSE
@@ -16,7 +16,6 @@ Specifically, the GPL parts of FFmpeg are
   libavcodec/x86/h264_deblock_sse2.asm
   libavcodec/x86/h264_idct_sse2.asm
   libavcodec/x86/idct_mmx.c
-- the AC-3 decoder in libavcodec/ac3dec.c
 - the X11 grabber in libavdevice/x11grab.c
 
 Some external libraries, e.g. libx264, are under GPL and can be used in
diff --git a/configure b/configure
index 0dc58a8d39..6ffc2b019b 100755
--- a/configure
+++ b/configure
@@ -990,7 +990,6 @@ oldscaler_deps="!swscale"
 
 # decoders / encoders
 aac_decoder_select="fft mdct"
-ac3_decoder_deps="gpl"
 ac3_decoder_select="fft mdct"
 atrac3_decoder_select="fft mdct"
 cavs_decoder_select="golomb"
@@ -999,7 +998,6 @@ cscd_decoder_suggest="zlib"
 dca_decoder_select="fft mdct"
 dnxhd_encoder_select="aandct"
 dxa_decoder_select="zlib"
-eac3_decoder_deps="gpl"
 eac3_decoder_select="fft mdct"
 eatgq_decoder_select="aandct"
 eatqi_decoder_select="aandct"
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 766b262e49..7a2042c059 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -7,24 +7,19 @@
  * Copyright (c) 2007-2008 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  * Copyright (c) 2007 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * Portions of this code are derived from liba52
- * http://liba52.sourceforge.net
- * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
+ * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
+ * Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public
+ * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
@@ -438,12 +433,12 @@ static void calc_transform_coeffs_cpl(AC3DecodeContext *s)
  * Grouped mantissas for 3-level 5-level and 11-level quantization
  */
 typedef struct {
-    int b1_mant[3];
-    int b2_mant[3];
-    int b4_mant[2];
-    int b1ptr;
-    int b2ptr;
-    int b4ptr;
+    int b1_mant[2];
+    int b2_mant[2];
+    int b4_mant;
+    int b1;
+    int b2;
+    int b4;
 } mant_groups;
 
 /**
@@ -452,73 +447,72 @@ typedef struct {
  */
 static void ac3_decode_transform_coeffs_ch(AC3DecodeContext *s, int ch_index, mant_groups *m)
 {
+    int start_freq = s->start_freq[ch_index];
+    int end_freq = s->end_freq[ch_index];
+    uint8_t *baps = s->bap[ch_index];
+    int8_t *exps = s->dexps[ch_index];
+    int *coeffs = s->fixed_coeffs[ch_index];
     GetBitContext *gbc = &s->gbc;
-    int i, gcode, tbap, start, end;
-    uint8_t *exps;
-    uint8_t *bap;
-    int *coeffs;
+    int freq;
 
-    exps = s->dexps[ch_index];
-    bap = s->bap[ch_index];
-    coeffs = s->fixed_coeffs[ch_index];
-    start = s->start_freq[ch_index];
-    end = s->end_freq[ch_index];
-
-    for (i = start; i < end; i++) {
-        tbap = bap[i];
-        switch (tbap) {
+    for(freq = start_freq; freq < end_freq; freq++){
+        int bap = baps[freq];
+        int mantissa;
+        switch(bap){
             case 0:
-                coeffs[i] = (av_lfg_get(&s->dith_state) & 0x7FFFFF) - 0x400000;
+                mantissa = (av_lfg_get(&s->dith_state) & 0x7FFFFF) - 0x400000;
                 break;
-
             case 1:
-                if(m->b1ptr > 2) {
-                    gcode = get_bits(gbc, 5);
-                    m->b1_mant[0] = b1_mantissas[gcode][0];
-                    m->b1_mant[1] = b1_mantissas[gcode][1];
-                    m->b1_mant[2] = b1_mantissas[gcode][2];
-                    m->b1ptr = 0;
+                if(m->b1){
+                    m->b1--;
+                    mantissa = m->b1_mant[m->b1];
+                }
+                else{
+                    int bits      = get_bits(gbc, 5);
+                    mantissa      = b1_mantissas[bits][0];
+                    m->b1_mant[1] = b1_mantissas[bits][1];
+                    m->b1_mant[0] = b1_mantissas[bits][2];
+                    m->b1         = 2;
                 }
-                coeffs[i] = m->b1_mant[m->b1ptr++];
                 break;
-
             case 2:
-                if(m->b2ptr > 2) {
-                    gcode = get_bits(gbc, 7);
-                    m->b2_mant[0] = b2_mantissas[gcode][0];
-                    m->b2_mant[1] = b2_mantissas[gcode][1];
-                    m->b2_mant[2] = b2_mantissas[gcode][2];
-                    m->b2ptr = 0;
+                if(m->b2){
+                    m->b2--;
+                    mantissa = m->b2_mant[m->b2];
+                }
+                else{
+                    int bits      = get_bits(gbc, 7);
+                    mantissa      = b2_mantissas[bits][0];
+                    m->b2_mant[1] = b2_mantissas[bits][1];
+                    m->b2_mant[0] = b2_mantissas[bits][2];
+                    m->b2         = 2;
                 }
-                coeffs[i] = m->b2_mant[m->b2ptr++];
                 break;
-
             case 3:
-                coeffs[i] = b3_mantissas[get_bits(gbc, 3)];
+                mantissa = b3_mantissas[get_bits(gbc, 3)];
                 break;
-
             case 4:
-                if(m->b4ptr > 1) {
-                    gcode = get_bits(gbc, 7);
-                    m->b4_mant[0] = b4_mantissas[gcode][0];
-                    m->b4_mant[1] = b4_mantissas[gcode][1];
-                    m->b4ptr = 0;
+                if(m->b4){
+                    m->b4 = 0;
+                    mantissa = m->b4_mant;
+                }
+                else{
+                    int bits   = get_bits(gbc, 7);
+                    mantissa   = b4_mantissas[bits][0];
+                    m->b4_mant = b4_mantissas[bits][1];
+                    m->b4      = 1;
                 }
-                coeffs[i] = m->b4_mant[m->b4ptr++];
                 break;
-
             case 5:
-                coeffs[i] = b5_mantissas[get_bits(gbc, 4)];
+                mantissa = b5_mantissas[get_bits(gbc, 4)];
                 break;
-
-            default: {
-                /* asymmetric dequantization */
-                int qlevel = quantization_tab[tbap];
-                coeffs[i] = get_sbits(gbc, qlevel) << (24 - qlevel);
+            default: /* 6 to 15 */
+                mantissa = get_bits(gbc, quantization_tab[bap]);
+                /* Shift mantissa and sign-extend it. */
+                mantissa = (mantissa << (32-quantization_tab[bap]))>>8;
                 break;
-            }
         }
-        coeffs[i] >>= exps[i];
+        coeffs[freq] = mantissa >> exps[freq];
     }
 }
 
@@ -581,7 +575,7 @@ static void decode_transform_coeffs(AC3DecodeContext *s, int blk)
     int got_cplchan = 0;
     mant_groups m;
 
-    m.b1ptr = m.b2ptr = m.b4ptr = 3;
+    m.b1 = m.b2 = m.b4 = 0;
 
     for (ch = 1; ch <= s->channels; ch++) {
         /* transform coefficients for full-bandwidth channel */

From 41a4fd7a61e83127498c443840e696042152cf19 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 24 May 2009 22:14:10 +0000
Subject: [PATCH 024/315] Merge more verbose licensing information output.

Originally committed as revision 18931 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index 6ffc2b019b..4084c869e7 100755
--- a/configure
+++ b/configure
@@ -2333,11 +2333,11 @@ for type in decoder encoder parser demuxer muxer protocol filter bsf indev outde
     echo
 done
 
-license="LGPL"
+license="LGPL version 2.1 or later"
 if enabled nonfree; then
-    license="unredistributable"
+    license="nonfree and unredistributable"
 elif enabled gpl; then
-    license="GPL"
+    license="GPL version 2 or later"
 fi
 
 echo "License: $license"

From df0ff1a02954a665a3c88536cf7dcf2f9cdca5a3 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 25 May 2009 09:17:17 +0000
Subject: [PATCH 025/315] Ignore generated files.

Originally committed as revision 18939 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5

From dd2089dfd8c3851f66f1dc5fd29d8030379bb2dc Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 7 Jun 2009 12:52:31 +0000
Subject: [PATCH 026/315] Merge (L)GPL upgrade code and related changes from
 trunk.

Originally committed as revision 19129 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 COPYING.GPL => COPYING.GPLv2     |   0
 COPYING.GPLv3                    | 674 +++++++++++++++++++++++++++++++
 COPYING.LGPL => COPYING.LGPLv2.1 |   0
 COPYING.LGPLv3                   | 165 ++++++++
 LICENSE                          |  28 +-
 cmdutils.c                       |  36 +-
 configure                        |   9 +
 7 files changed, 898 insertions(+), 14 deletions(-)
 rename COPYING.GPL => COPYING.GPLv2 (100%)
 create mode 100644 COPYING.GPLv3
 rename COPYING.LGPL => COPYING.LGPLv2.1 (100%)
 create mode 100644 COPYING.LGPLv3

diff --git a/COPYING.GPL b/COPYING.GPLv2
similarity index 100%
rename from COPYING.GPL
rename to COPYING.GPLv2
diff --git a/COPYING.GPLv3 b/COPYING.GPLv3
new file mode 100644
index 0000000000..94a9ed024d
--- /dev/null
+++ b/COPYING.GPLv3
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/COPYING.LGPL b/COPYING.LGPLv2.1
similarity index 100%
rename from COPYING.LGPL
rename to COPYING.LGPLv2.1
diff --git a/COPYING.LGPLv3 b/COPYING.LGPLv3
new file mode 100644
index 0000000000..65c5ca88a6
--- /dev/null
+++ b/COPYING.LGPLv3
@@ -0,0 +1,165 @@
+                   GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions.
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/LICENSE b/LICENSE
index 929f65bebc..5817658978 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,10 +1,13 @@
+FFmpeg:
+-------
+
 Most files in FFmpeg are under the GNU Lesser General Public License version 2.1
-or later (LGPL v2.1+). Read the file COPYING.LGPL for details. Some other files
-have a MIT/X11/BSD-style license. In combination the LGPL v2.1+ applies to
+or later (LGPL v2.1+). Read the file COPYING.LGPLv2.1 for details. Some other
+files have MIT/X11/BSD-style licenses. In combination the LGPL v2.1+ applies to
 FFmpeg.
 
 Some optional parts of FFmpeg are licensed under the GNU General Public License
-version 2 or later (GPL v2+). See the file COPYING.GPL for details. None of
+version 2 or later (GPL v2+). See the file COPYING.GPLv2 for details. None of
 these parts are used by default, you have to explicitly pass --enable-gpl to
 configure to activate them. In this case, FFmpeg's license changes to GPL v2+.
 
@@ -18,6 +21,20 @@ Specifically, the GPL parts of FFmpeg are
   libavcodec/x86/idct_mmx.c
 - the X11 grabber in libavdevice/x11grab.c
 
+There are a handful of files under other licensing terms, namely:
+
+* The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
+  are taken from libjpeg, see the top of the files for licensing details.
+
+Should you, for whatever reason, prefer to use version 3 of the (L)GPL, then
+the configure parameter --enable-version3 will activate this licensing option
+for you. Read the file COPYING.LGPLv3 or, if you have enabled GPL parts,
+COPYING.GPLv3 to learn the exact legal terms that apply in this case.
+
+
+external libraries:
+-------------------
+
 Some external libraries, e.g. libx264, are under GPL and can be used in
 conjunction with FFmpeg. They require --enable-gpl to be passed to configure
 as well.
@@ -26,8 +43,3 @@ The nonfree external libraries libamrnb, libamrwb and libfaac can be hooked up
 in FFmpeg. You need to pass --enable-nonfree to configure to enable them. Employ
 this option with care as FFmpeg then becomes nonfree and unredistributable.
 Note that libfaac claims to be LGPL, but is not.
-
-There are a handful of files under other licensing terms, namely:
-
-* The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
-  are taken from libjpeg, see the top of the files for licensing details.
diff --git a/cmdutils.c b/cmdutils.c
index d5fb6cff7b..108eaccdd1 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -309,14 +309,26 @@ void show_version(void) {
 
 void show_license(void)
 {
-#if CONFIG_NONFREE
     printf(
+#if CONFIG_NONFREE
     "This version of %s has nonfree parts compiled in.\n"
     "Therefore it is not legally redistributable.\n",
     program_name
-    );
+#elif CONFIG_GPLV3
+    "%s is free software; you can redistribute it and/or modify\n"
+    "it under the terms of the GNU General Public License as published by\n"
+    "the Free Software Foundation; either version 3 of the License, or\n"
+    "(at your option) any later version.\n"
+    "\n"
+    "%s is distributed in the hope that it will be useful,\n"
+    "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+    "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+    "GNU General Public License for more details.\n"
+    "\n"
+    "You should have received a copy of the GNU General Public License\n"
+    "along with %s.  If not, see <http://www.gnu.org/licenses/>.\n",
+    program_name, program_name, program_name
 #elif CONFIG_GPL
-    printf(
     "%s is free software; you can redistribute it and/or modify\n"
     "it under the terms of the GNU General Public License as published by\n"
     "the Free Software Foundation; either version 2 of the License, or\n"
@@ -331,9 +343,21 @@ void show_license(void)
     "along with %s; if not, write to the Free Software\n"
     "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n",
     program_name, program_name, program_name
-    );
+#elif CONFIG_LGPLV3
+    "%s is free software; you can redistribute it and/or modify\n"
+    "it under the terms of the GNU Lesser General Public License as published by\n"
+    "the Free Software Foundation; either version 3 of the License, or\n"
+    "(at your option) any later version.\n"
+    "\n"
+    "%s is distributed in the hope that it will be useful,\n"
+    "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+    "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+    "GNU Lesser General Public License for more details.\n"
+    "\n"
+    "You should have received a copy of the GNU Lesser General Public License\n"
+    "along with %s.  If not, see <http://www.gnu.org/licenses/>.\n",
+    program_name, program_name, program_name
 #else
-    printf(
     "%s is free software; you can redistribute it and/or\n"
     "modify it under the terms of the GNU Lesser General Public\n"
     "License as published by the Free Software Foundation; either\n"
@@ -348,8 +372,8 @@ void show_license(void)
     "License along with %s; if not, write to the Free Software\n"
     "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n",
     program_name, program_name, program_name
-    );
 #endif
+    );
 }
 
 void show_formats(void)
diff --git a/configure b/configure
index 4084c869e7..2a576e0c06 100755
--- a/configure
+++ b/configure
@@ -75,6 +75,7 @@ show_help(){
   echo "  --enable-shared          build shared libraries [no]"
   echo "  --enable-gpl             allow use of GPL code, the resulting libs"
   echo "                           and binaries will be under GPL [no]"
+  echo "  --enable-version3        upgrade (L)GPL to version 3 [no]"
   echo "  --enable-nonfree         allow use of nonfree code, the resulting libs"
   echo "                           and binaries will be unredistributable [no]"
   echo "  --disable-ffmpeg         disable ffmpeg build"
@@ -801,6 +802,7 @@ CONFIG_LIST="
     swscale
     vdpau
     vhook
+    version3
     x11grab
     zlib
 "
@@ -917,6 +919,8 @@ HAVE_LIST="
 
 # options emitted with CONFIG_ prefix but not available on command line
 CONFIG_EXTRA="
+    gplv3
+    lgplv3
     oldscaler
 "
 
@@ -1695,6 +1699,7 @@ die_license_disabled nonfree libamr_nb
 die_license_disabled nonfree libamr_wb
 die_license_disabled nonfree libfaac
 
+enabled version3 && { enabled gpl && enable gplv3 || enable lgplv3; }
 
 check_deps $ARCH_EXT_LIST
 
@@ -2336,6 +2341,10 @@ done
 license="LGPL version 2.1 or later"
 if enabled nonfree; then
     license="nonfree and unredistributable"
+elif enabled gplv3; then
+    license="GPL version 3 or later"
+elif enabled lgplv3; then
+    license="LGPL version 3 or later"
 elif enabled gpl; then
     license="GPL version 2 or later"
 fi

From 4fcef88c4d015da8e8ba8c2298c24d73eb95e73c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 7 Jun 2009 16:14:50 +0000
Subject: [PATCH 027/315] Merge recent libamr changes from trunk, as
 preparation for OpenCORE support.

Originally committed as revision 19131 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/libamr.c | 715 ++++++++++++++------------------------------
 1 file changed, 221 insertions(+), 494 deletions(-)

diff --git a/libavcodec/libamr.c b/libavcodec/libamr.c
index 6a9de50fdc..cfbd1bbe73 100644
--- a/libavcodec/libamr.c
+++ b/libavcodec/libamr.c
@@ -25,22 +25,13 @@
  * This code implements both an AMR-NarrowBand (AMR-NB) and an AMR-WideBand
  * (AMR-WB) audio encoder/decoder through external reference code from
  * http://www.3gpp.org/. The license of the code from 3gpp is unclear so you
- * have to download the code separately. Two versions exists: One fixed-point
- * and one floating-point. For some reason the float encoder is significantly
- * faster at least on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip
- * at MR102). Both float and fixed point are supported for AMR-NB, but only
- * float for AMR-WB.
+ * have to download the code separately.
  *
  * \section AMR-NB
  *
- * \subsection Float
  * The float version (default) can be downloaded from:
  * http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-610.zip
  *
- * \subsection Fixed-point
- * The fixed-point (TS26.073) can be downloaded from:
- * http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-600.zip
- *
  * \subsection Specification
  * The specification for AMR-NB can be found in TS 26.071
  * (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other
@@ -48,14 +39,9 @@
  *
  * \section AMR-WB
  *
- * \subsection Float
  * The reference code can be downloaded from:
  * http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-600.zip
  *
- * \subsection Fixed-point
- * If someone wants to use the fixed point version it can be downloaded from:
- * http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip.
- *
  * \subsection Specification
  * The specification for AMR-WB can be found in TS 26.171
  * (http://www.3gpp.org/ftp/Specs/html-info/26171.htm) and some other
@@ -65,31 +51,30 @@
 
 #include "avcodec.h"
 
-#if CONFIG_LIBAMR_NB_FIXED
+static void amr_decode_fix_avctx(AVCodecContext *avctx)
+{
+    const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
 
-#define MMS_IO
+    if (!avctx->sample_rate)
+        avctx->sample_rate = 8000 * is_amr_wb;
 
-#include "amr/sp_dec.h"
-#include "amr/d_homing.h"
-#include "amr/typedef.h"
-#include "amr/sp_enc.h"
-#include "amr/sid_sync.h"
-#include "amr/e_homing.h"
+    if (!avctx->channels)
+        avctx->channels = 1;
+
+    avctx->frame_size = 160 * is_amr_wb;
+    avctx->sample_fmt = SAMPLE_FMT_S16;
+}
+
+#if CONFIG_LIBAMR_NB
 
-#else
 #include <amrnb/interf_dec.h>
 #include <amrnb/interf_enc.h>
-#endif
 
 static const char nb_bitrate_unsupported[] =
     "bitrate not supported: use one of 4.75k, 5.15k, 5.9k, 6.7k, 7.4k, 7.95k, 10.2k or 12.2k\n";
-static const char wb_bitrate_unsupported[] =
-    "bitrate not supported: use one of 6.6k, 8.85k, 12.65k, 14.25k, 15.85k, 18.25k, 19.85k, 23.05k, or 23.85k\n";
 
-/* Common code for fixed and float version*/
-typedef struct AMR_bitrates
-{
-    int rate;
+typedef struct AMR_bitrates {
+    int       rate;
     enum Mode mode;
 } AMR_bitrates;
 
@@ -97,287 +82,44 @@ typedef struct AMR_bitrates
 static int getBitrateMode(int bitrate)
 {
     /* make the correspondance between bitrate and mode */
-    AMR_bitrates rates[]={ {4750,MR475},
-                           {5150,MR515},
-                           {5900,MR59},
-                           {6700,MR67},
-                           {7400,MR74},
-                           {7950,MR795},
-                           {10200,MR102},
-                           {12200,MR122},
-                         };
+    AMR_bitrates rates[] = { { 4750, MR475},
+                             { 5150, MR515},
+                             { 5900, MR59},
+                             { 6700, MR67},
+                             { 7400, MR74},
+                             { 7950, MR795},
+                             {10200, MR102},
+                             {12200, MR122}, };
     int i;
 
-    for(i=0;i<8;i++)
-    {
-        if(rates[i].rate==bitrate)
-        {
+    for (i = 0; i < 8; i++)
+        if (rates[i].rate == bitrate)
             return rates[i].mode;
-        }
-    }
     /* no bitrate matching, return an error */
     return -1;
 }
 
-static void amr_decode_fix_avctx(AVCodecContext * avctx)
-{
-    const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
-
-    if(avctx->sample_rate == 0)
-    {
-        avctx->sample_rate = 8000 * is_amr_wb;
-    }
-
-    if(avctx->channels == 0)
-    {
-        avctx->channels = 1;
-    }
-
-    avctx->frame_size = 160 * is_amr_wb;
-    avctx->sample_fmt = SAMPLE_FMT_S16;
-}
-
-#if CONFIG_LIBAMR_NB_FIXED
-/* fixed point version*/
-/* frame size in serial bitstream file (frame type + serial stream + flags) */
-#define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5)
-
 typedef struct AMRContext {
-    int frameCount;
-    Speech_Decode_FrameState *speech_decoder_state;
-    enum RXFrameType rx_type;
-    enum Mode mode;
-    Word16 reset_flag;
-    Word16 reset_flag_old;
-
-    int enc_bitrate;
-    Speech_Encode_FrameState *enstate;
-    sid_syncState *sidstate;
-    enum TXFrameType tx_frametype;
+    int   frameCount;
+    void *decState;
+    int  *enstate;
+    int   enc_bitrate;
 } AMRContext;
 
-static av_cold int amr_nb_decode_init(AVCodecContext * avctx)
+static av_cold int amr_nb_decode_init(AVCodecContext *avctx)
 {
     AMRContext *s = avctx->priv_data;
 
-    s->frameCount=0;
-    s->speech_decoder_state=NULL;
-    s->rx_type = (enum RXFrameType)0;
-    s->mode= (enum Mode)0;
-    s->reset_flag=0;
-    s->reset_flag_old=1;
-
-    if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder"))
-    {
-        av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n");
-        return -1;
-    }
-
-    amr_decode_fix_avctx(avctx);
-
-    if(avctx->channels > 1)
-    {
-        av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
-        return -1;
-    }
-
-    return 0;
-}
-
-static av_cold int amr_nb_encode_init(AVCodecContext * avctx)
-{
-    AMRContext *s = avctx->priv_data;
-
-    s->frameCount=0;
-    s->speech_decoder_state=NULL;
-    s->rx_type = (enum RXFrameType)0;
-    s->mode= (enum Mode)0;
-    s->reset_flag=0;
-    s->reset_flag_old=1;
-
-    if(avctx->sample_rate!=8000)
-    {
-        av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
-        return -1;
-    }
-
-    if(avctx->channels!=1)
-    {
-        av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
-        return -1;
-    }
-
-    avctx->frame_size=160;
-    avctx->coded_frame= avcodec_alloc_frame();
-
-    if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate))
-    {
-        av_log(avctx, AV_LOG_ERROR, "Speech_Encode_Frame_init error\n");
-        return -1;
-    }
-
-    if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
-    {
-        av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
-        return -1;
-    }
-
-    return 0;
-}
-
-static av_cold int amr_nb_encode_close(AVCodecContext * avctx)
-{
-    AMRContext *s = avctx->priv_data;
-
-    Speech_Encode_Frame_exit(&s->enstate);
-    sid_sync_exit (&s->sidstate);
-    av_freep(&avctx->coded_frame);
-    return 0;
-}
-
-static av_cold int amr_nb_decode_close(AVCodecContext * avctx)
-{
-    AMRContext *s = avctx->priv_data;
-
-    Speech_Decode_Frame_exit(&s->speech_decoder_state);
-    return 0;
-}
-
-static int amr_nb_decode_frame(AVCodecContext * avctx,
-            void *data, int *data_size,
-            const uint8_t * buf, int buf_size)
-{
-    AMRContext *s = avctx->priv_data;
-    const uint8_t*amrData=buf;
-    int offset=0;
-    UWord8 toc, q, ft;
-    Word16 serial[SERIAL_FRAMESIZE];   /* coded bits */
-    Word16 *synth;
-    UWord8 *packed_bits;
-    static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0};
-    int i;
-
-    //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount);
-
-    synth=data;
-
-    toc=amrData[offset];
-    /* read rest of the frame based on ToC byte */
-    q  = (toc >> 2) & 0x01;
-    ft = (toc >> 3) & 0x0F;
-
-    //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]);
-
-    offset++;
-
-    packed_bits=amrData+offset;
-
-    offset+=packed_size[ft];
-
-    //Unsort and unpack bits
-    s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]);
-
-    //We have a new frame
-    s->frameCount++;
-
-    if (s->rx_type == RX_NO_DATA)
-    {
-        s->mode = s->speech_decoder_state->prev_mode;
-    }
-    else {
-        s->speech_decoder_state->prev_mode = s->mode;
-    }
-
-    /* if homed: check if this frame is another homing frame */
-    if (s->reset_flag_old == 1)
-    {
-        /* only check until end of first subframe */
-        s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode);
-    }
-    /* produce encoder homing frame if homed & input=decoder homing frame */
-    if ((s->reset_flag != 0) && (s->reset_flag_old != 0))
-    {
-        for (i = 0; i < L_FRAME; i++)
-        {
-            synth[i] = EHF_MASK;
-        }
-    }
-    else
-    {
-        /* decode frame */
-        Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth);
-    }
-
-    //Each AMR-frame results in 160 16-bit samples
-    *data_size=160*2;
-
-    /* if not homed: check whether current frame is a homing frame */
-    if (s->reset_flag_old == 0)
-    {
-        /* check whole frame */
-        s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode);
-    }
-    /* reset decoder if current frame is a homing frame */
-    if (s->reset_flag != 0)
-    {
-        Speech_Decode_Frame_reset(s->speech_decoder_state);
-    }
-    s->reset_flag_old = s->reset_flag;
-
-    return offset;
-}
-
-
-static int amr_nb_encode_frame(AVCodecContext *avctx,
-                            unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
-{
-    short serial_data[250] = {0};
-    AMRContext *s = avctx->priv_data;
-    int written;
-
-    s->reset_flag = encoder_homing_frame_test(data);
-
-    Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode);
-
-    /* add frame type and mode */
-    sid_sync (s->sidstate, s->mode, &s->tx_frametype);
-
-    written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame);
-
-    if (s->reset_flag != 0)
-    {
-        Speech_Encode_Frame_reset(s->enstate);
-        sid_sync_reset(s->sidstate);
-    }
-    return written;
-}
-
-
-#elif CONFIG_LIBAMR_NB /* Float point version*/
-
-typedef struct AMRContext {
-    int frameCount;
-    void * decState;
-    int *enstate;
-    int enc_bitrate;
-} AMRContext;
-
-static av_cold int amr_nb_decode_init(AVCodecContext * avctx)
-{
-    AMRContext *s = avctx->priv_data;
-
-    s->frameCount=0;
-    s->decState=Decoder_Interface_init();
-    if(!s->decState)
-    {
+    s->frameCount = 0;
+    s->decState   = Decoder_Interface_init();
+    if (!s->decState) {
         av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
         return -1;
     }
 
     amr_decode_fix_avctx(avctx);
 
-    if(avctx->channels > 1)
-    {
+    if (avctx->channels > 1) {
         av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
         return -1;
     }
@@ -385,44 +127,7 @@ static av_cold int amr_nb_decode_init(AVCodecContext * avctx)
     return 0;
 }
 
-static av_cold int amr_nb_encode_init(AVCodecContext * avctx)
-{
-    AMRContext *s = avctx->priv_data;
-
-    s->frameCount=0;
-
-    if(avctx->sample_rate!=8000)
-    {
-        av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
-        return -1;
-    }
-
-    if(avctx->channels!=1)
-    {
-        av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
-        return -1;
-    }
-
-    avctx->frame_size=160;
-    avctx->coded_frame= avcodec_alloc_frame();
-
-    s->enstate=Encoder_Interface_init(0);
-    if(!s->enstate)
-    {
-        av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
-        return -1;
-    }
-
-    if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
-    {
-        av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
-        return -1;
-    }
-
-    return 0;
-}
-
-static av_cold int amr_nb_decode_close(AVCodecContext * avctx)
+static av_cold int amr_nb_decode_close(AVCodecContext *avctx)
 {
     AMRContext *s = avctx->priv_data;
 
@@ -430,72 +135,39 @@ static av_cold int amr_nb_decode_close(AVCodecContext * avctx)
     return 0;
 }
 
-static av_cold int amr_nb_encode_close(AVCodecContext * avctx)
+static int amr_nb_decode_frame(AVCodecContext *avctx, void *data,
+                               int *data_size,
+                               const uint8_t *buf, int buf_size)
 {
     AMRContext *s = avctx->priv_data;
-
-    Encoder_Interface_exit(s->enstate);
-    av_freep(&avctx->coded_frame);
-    return 0;
-}
-
-static int amr_nb_decode_frame(AVCodecContext * avctx,
-            void *data, int *data_size,
-            const uint8_t * buf, int buf_size)
-{
-    AMRContext *s = avctx->priv_data;
-    const uint8_t*amrData=buf;
-    static const uint8_t block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
+    const uint8_t *amrData = buf;
+    static const uint8_t block_size[16] = { 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
     enum Mode dec_mode;
     int packet_size;
 
-    /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */
+    /* av_log(NULL, AV_LOG_DEBUG, "amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",
+              buf, buf_size, s->frameCount); */
 
     dec_mode = (buf[0] >> 3) & 0x000F;
-    packet_size = block_size[dec_mode]+1;
+    packet_size = block_size[dec_mode] + 1;
 
-    if(packet_size > buf_size) {
-        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size);
+    if (packet_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n",
+               buf_size, packet_size);
         return -1;
     }
 
     s->frameCount++;
-    /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */
+    /* av_log(NULL, AV_LOG_DEBUG, "packet_size=%d amrData= 0x%X %X %X %X\n",
+              packet_size, amrData[0], amrData[1], amrData[2], amrData[3]); */
     /* call decoder */
     Decoder_Interface_Decode(s->decState, amrData, data, 0);
-    *data_size=160*2;
+    *data_size = 160 * 2;
 
     return packet_size;
 }
 
-static int amr_nb_encode_frame(AVCodecContext *avctx,
-                            unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
-{
-    AMRContext *s = avctx->priv_data;
-    int written;
-
-    if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
-    {
-        av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
-        return -1;
-    }
-
-    written = Encoder_Interface_Encode(s->enstate,
-        s->enc_bitrate,
-        data,
-        frame,
-        0);
-    /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */
-
-    return written;
-}
-
-#endif
-
-#if CONFIG_LIBAMR_NB || CONFIG_LIBAMR_NB_FIXED
-
-AVCodec libamr_nb_decoder =
-{
+AVCodec libamr_nb_decoder = {
     "libamr_nb",
     CODEC_TYPE_AUDIO,
     CODEC_ID_AMR_NB,
@@ -507,8 +179,69 @@ AVCodec libamr_nb_decoder =
     .long_name = NULL_IF_CONFIG_SMALL("libamr-nb Adaptive Multi-Rate (AMR) Narrow-Band"),
 };
 
-AVCodec libamr_nb_encoder =
+static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
 {
+    AMRContext *s = avctx->priv_data;
+
+    s->frameCount = 0;
+
+    if (avctx->sample_rate != 8000) {
+        av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
+        return -1;
+    }
+
+    if (avctx->channels != 1) {
+        av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
+        return -1;
+    }
+
+    avctx->frame_size  = 160;
+    avctx->coded_frame = avcodec_alloc_frame();
+
+    s->enstate=Encoder_Interface_init(0);
+    if (!s->enstate) {
+        av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
+        return -1;
+    }
+
+    if ((s->enc_bitrate = getBitrateMode(avctx->bit_rate)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
+        return -1;
+    }
+
+    return 0;
+}
+
+static av_cold int amr_nb_encode_close(AVCodecContext *avctx)
+{
+    AMRContext *s = avctx->priv_data;
+
+    Encoder_Interface_exit(s->enstate);
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+static int amr_nb_encode_frame(AVCodecContext *avctx,
+                               unsigned char *frame/*out*/,
+                               int buf_size, void *data/*in*/)
+{
+    AMRContext *s = avctx->priv_data;
+    int written;
+
+    if ((s->enc_bitrate = getBitrateMode(avctx->bit_rate)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
+        return -1;
+    }
+
+    written = Encoder_Interface_Encode(s->enstate, s->enc_bitrate, data,
+                                       frame, 0);
+    /* av_log(NULL, AV_LOG_DEBUG, "amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",
+              written, s->enc_bitrate, frame[0] ); */
+
+    return written;
+}
+
+AVCodec libamr_nb_encoder = {
     "libamr_nb",
     CODEC_TYPE_AUDIO,
     CODEC_ID_AMR_NB,
@@ -531,85 +264,80 @@ AVCodec libamr_nb_encoder =
 #define typedef_h
 #endif
 
-#include <amrwb/enc_if.h>
 #include <amrwb/dec_if.h>
 #include <amrwb/if_rom.h>
 
-/* Common code for fixed and float version*/
-typedef struct AMRWB_bitrates
-{
+static const char wb_bitrate_unsupported[] =
+    "bitrate not supported: use one of 6.6k, 8.85k, 12.65k, 14.25k, 15.85k, 18.25k, 19.85k, 23.05k, or 23.85k\n";
+
+typedef struct AMRWB_bitrates {
     int rate;
     int mode;
 } AMRWB_bitrates;
 
+typedef struct AMRWBContext {
+    int    frameCount;
+    void  *state;
+    int    mode;
+    Word16 allow_dtx;
+} AMRWBContext;
+
+#if CONFIG_LIBAMR_WB_ENCODER
+
+#include <amrwb/enc_if.h>
+
 static int getWBBitrateMode(int bitrate)
 {
     /* make the correspondance between bitrate and mode */
-    AMRWB_bitrates rates[]={ {6600,0},
-                           {8850,1},
-                           {12650,2},
-                           {14250,3},
-                           {15850,4},
-                           {18250,5},
-                           {19850,6},
-                           {23050,7},
-                           {23850,8},
-                         };
+    AMRWB_bitrates rates[] = { { 6600, 0},
+                               { 8850, 1},
+                               {12650, 2},
+                               {14250, 3},
+                               {15850, 4},
+                               {18250, 5},
+                               {19850, 6},
+                               {23050, 7},
+                               {23850, 8}, };
     int i;
 
-    for(i=0;i<9;i++)
-    {
-        if(rates[i].rate==bitrate)
-        {
+    for (i = 0; i < 9; i++)
+        if (rates[i].rate == bitrate)
             return rates[i].mode;
-        }
-    }
     /* no bitrate matching, return an error */
     return -1;
 }
 
-
-typedef struct AMRWBContext {
-    int frameCount;
-    void *state;
-    int mode;
-    Word16 allow_dtx;
-} AMRWBContext;
-
-static int amr_wb_encode_init(AVCodecContext * avctx)
+static av_cold int amr_wb_encode_init(AVCodecContext *avctx)
 {
     AMRWBContext *s = avctx->priv_data;
 
-    s->frameCount=0;
+    s->frameCount = 0;
 
-    if(avctx->sample_rate!=16000)
-    {
+    if (avctx->sample_rate != 16000) {
         av_log(avctx, AV_LOG_ERROR, "Only 16000Hz sample rate supported\n");
         return -1;
     }
 
-    if(avctx->channels!=1)
-    {
+    if (avctx->channels != 1) {
         av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
         return -1;
     }
 
-    if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
-    {
+    if ((s->mode = getWBBitrateMode(avctx->bit_rate)) < 0) {
         av_log(avctx, AV_LOG_ERROR, wb_bitrate_unsupported);
         return -1;
     }
 
-    avctx->frame_size=320;
-    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->frame_size  = 320;
+    avctx->coded_frame = avcodec_alloc_frame();
 
-    s->state = E_IF_init();
-    s->allow_dtx=0;
+    s->state     = E_IF_init();
+    s->allow_dtx = 0;
 
     return 0;
 }
 
-static int amr_wb_encode_close(AVCodecContext * avctx)
+static int amr_wb_encode_close(AVCodecContext *avctx)
 {
     AMRWBContext *s = avctx->priv_data;
 
@@ -620,13 +348,13 @@ static int amr_wb_encode_close(AVCodecContext * avctx)
 }
 
 static int amr_wb_encode_frame(AVCodecContext *avctx,
-                            unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
+                               unsigned char *frame/*out*/,
+                               int buf_size, void *data/*in*/)
 {
     AMRWBContext *s = avctx->priv_data;
     int size;
 
-    if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
-    {
+    if ((s->mode = getWBBitrateMode(avctx->bit_rate)) < 0) {
         av_log(avctx, AV_LOG_ERROR, wb_bitrate_unsupported);
         return -1;
     }
@@ -634,76 +362,7 @@ static int amr_wb_encode_frame(AVCodecContext *avctx,
     return size;
 }
 
-static int amr_wb_decode_init(AVCodecContext * avctx)
-{
-    AMRWBContext *s = avctx->priv_data;
-
-    s->frameCount=0;
-    s->state = D_IF_init();
-
-    amr_decode_fix_avctx(avctx);
-
-    if(avctx->channels > 1)
-    {
-        av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
-        return -1;
-    }
-
-    return 0;
-}
-
-static int amr_wb_decode_frame(AVCodecContext * avctx,
-            void *data, int *data_size,
-            const uint8_t * buf, int buf_size)
-{
-    AMRWBContext *s = avctx->priv_data;
-    const uint8_t*amrData=buf;
-    int mode;
-    int packet_size;
-    static const uint8_t block_size[16] = {18, 23, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1};
-
-    if(buf_size==0) {
-        /* nothing to do */
-        return 0;
-    }
-
-    mode = (amrData[0] >> 3) & 0x000F;
-    packet_size = block_size[mode];
-
-    if(packet_size > buf_size) {
-        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1);
-        return -1;
-    }
-
-    s->frameCount++;
-    D_IF_decode( s->state, amrData, data, _good_frame);
-    *data_size=320*2;
-    return packet_size;
-}
-
-static int amr_wb_decode_close(AVCodecContext * avctx)
-{
-    AMRWBContext *s = avctx->priv_data;
-
-    D_IF_exit(s->state);
-    return 0;
-}
-
-AVCodec libamr_wb_decoder =
-{
-    "libamr_wb",
-    CODEC_TYPE_AUDIO,
-    CODEC_ID_AMR_WB,
-    sizeof(AMRWBContext),
-    amr_wb_decode_init,
-    NULL,
-    amr_wb_decode_close,
-    amr_wb_decode_frame,
-    .long_name = NULL_IF_CONFIG_SMALL("libamr-wb Adaptive Multi-Rate (AMR) Wide-Band"),
-};
-
-AVCodec libamr_wb_encoder =
-{
+AVCodec libamr_wb_encoder = {
     "libamr_wb",
     CODEC_TYPE_AUDIO,
     CODEC_ID_AMR_WB,
@@ -716,4 +375,72 @@ AVCodec libamr_wb_encoder =
     .long_name = NULL_IF_CONFIG_SMALL("libamr-wb Adaptive Multi-Rate (AMR) Wide-Band"),
 };
 
+#endif
+
+static av_cold int amr_wb_decode_init(AVCodecContext *avctx)
+{
+    AMRWBContext *s = avctx->priv_data;
+
+    s->frameCount = 0;
+    s->state      = D_IF_init();
+
+    amr_decode_fix_avctx(avctx);
+
+    if (avctx->channels > 1) {
+        av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int amr_wb_decode_frame(AVCodecContext *avctx,
+                               void *data, int *data_size,
+                               const uint8_t *buf, int buf_size)
+{
+    AMRWBContext *s = avctx->priv_data;
+    const uint8_t *amrData = buf;
+    int mode;
+    int packet_size;
+    static const uint8_t block_size[16] = {18, 24, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1};
+
+    if (!buf_size)
+        /* nothing to do */
+        return 0;
+
+    mode = (amrData[0] >> 3) & 0x000F;
+    packet_size = block_size[mode];
+
+    if (packet_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n",
+               buf_size, packet_size + 1);
+        return -1;
+    }
+
+    s->frameCount++;
+    D_IF_decode(s->state, amrData, data, _good_frame);
+    *data_size = 320 * 2;
+    return packet_size;
+}
+
+static int amr_wb_decode_close(AVCodecContext *avctx)
+{
+    AMRWBContext *s = avctx->priv_data;
+
+    D_IF_exit(s->state);
+    return 0;
+}
+
+AVCodec libamr_wb_decoder = {
+    "libamr_wb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_WB,
+    sizeof(AMRWBContext),
+    amr_wb_decode_init,
+    NULL,
+    amr_wb_decode_close,
+    amr_wb_decode_frame,
+    .long_name = NULL_IF_CONFIG_SMALL("libamr-wb Adaptive Multi-Rate (AMR) Wide-Band"),
+};
+
 #endif //CONFIG_LIBAMR_WB

From 9ad437eafbb34cedbc9c07f24f9143d9d53e839f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 7 Jun 2009 20:14:56 +0000
Subject: [PATCH 028/315] Merge OpenCORE AMR support from trunk.

Originally committed as revision 19133 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog                    |   1 +
 LICENSE                      |   5 +
 configure                    |  14 ++
 doc/general.texi             |  22 ++-
 libavcodec/Makefile          |   2 +
 libavcodec/allcodecs.c       |   2 +
 libavcodec/libopencore-amr.c | 325 +++++++++++++++++++++++++++++++++++
 7 files changed, 368 insertions(+), 3 deletions(-)
 create mode 100644 libavcodec/libopencore-amr.c

diff --git a/Changelog b/Changelog
index 8dc20b45d1..2462e6e33b 100644
--- a/Changelog
+++ b/Changelog
@@ -9,6 +9,7 @@ version 0.5.1:
 - fix for GPL code in libswscale that was erroneously activated
 - AltiVec code in libswscale is now LGPL
 - remaining GPL parts in AC-3 decoder converted to LGPL
+- AMR-NB decoding/encoding, AMR-WB decoding via OpenCORE libraries
 
 
diff --git a/LICENSE b/LICENSE
index 5817658978..8dc2c8953d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -39,6 +39,11 @@ Some external libraries, e.g. libx264, are under GPL and can be used in
 conjunction with FFmpeg. They require --enable-gpl to be passed to configure
 as well.
 
+The OpenCORE external libraries are under the Apache License 2.0. That license
+is incompatible with the LGPL v2.1 and the GPL v2, but not with version 3 of
+those licenses. So to combine the OpenCORE libraries with FFmpeg, the license
+version needs to be upgraded by passing --enable-version3 to configure.
+
 The nonfree external libraries libamrnb, libamrwb and libfaac can be hooked up
 in FFmpeg. You need to pass --enable-nonfree to configure to enable them. Employ
 this option with care as FFmpeg then becomes nonfree and unredistributable.
diff --git a/configure b/configure
index 2a576e0c06..9181e792a7 100755
--- a/configure
+++ b/configure
@@ -149,6 +149,8 @@ show_help(){
   echo "  --enable-bzlib           enable bzlib [autodetect]"
   echo "  --enable-libamr-nb       enable libamr-nb floating point audio codec [no]"
   echo "  --enable-libamr-wb       enable libamr-wb floating point audio codec [no]"
+  echo "  --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no]"
+  echo "  --enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no]"
   echo "  --enable-libdc1394       enable IIDC-1394 grabbing using libdc1394"
   echo "                           and libraw1394 [no]"
   echo "  --enable-libdirac        enable Dirac support via libdirac [no]"
@@ -780,6 +782,8 @@ CONFIG_LIST="
     libgsm
     libmp3lame
     libnut
+    libopencore_amrnb
+    libopencore_amrwb
     libopenjpeg
     libschroedinger
     libspeex
@@ -1079,6 +1083,9 @@ libgsm_encoder_deps="libgsm"
 libgsm_ms_decoder_deps="libgsm"
 libgsm_ms_encoder_deps="libgsm"
 libmp3lame_encoder_deps="libmp3lame"
+libopencore_amrnb_decoder_deps="libopencore_amrnb"
+libopencore_amrnb_encoder_deps="libopencore_amrnb"
+libopencore_amrwb_decoder_deps="libopencore_amrwb"
 libopenjpeg_decoder_deps="libopenjpeg"
 libschroedinger_decoder_deps="libschroedinger"
 libschroedinger_encoder_deps="libschroedinger"
@@ -1699,6 +1706,9 @@ die_license_disabled nonfree libamr_nb
 die_license_disabled nonfree libamr_wb
 die_license_disabled nonfree libfaac
 
+die_license_disabled version3 libopencore_amrnb
+die_license_disabled version3 libopencore_amrwb
+
 enabled version3 && { enabled gpl && enable gplv3 || enable lgplv3; }
 
 check_deps $ARCH_EXT_LIST
@@ -1990,6 +2000,8 @@ enabled libfaad    && require2 libfaad faad.h faacDecOpen -lfaad
 enabled libgsm     && require  libgsm gsm.h gsm_create -lgsm
 enabled libmp3lame && require  libmp3lame lame/lame.h lame_init -lmp3lame -lm
 enabled libnut     && require  libnut libnut.h nut_demuxer_init -lnut
+enabled libopencore_amrnb  && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb -lm
+enabled libopencore_amrwb  && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb -lm
 enabled libopenjpeg && require libopenjpeg openjpeg.h opj_version -lopenjpeg
 enabled libschroedinger && add_cflags $(pkg-config --cflags schroedinger-1.0) &&
                            require libschroedinger schroedinger/schro.h schro_init $(pkg-config --libs schroedinger-1.0)
@@ -2317,6 +2329,8 @@ echo "libfaad dlopened          ${libfaadbin-no}"
 echo "libgsm enabled            ${libgsm-no}"
 echo "libmp3lame enabled        ${libmp3lame-no}"
 echo "libnut enabled            ${libnut-no}"
+echo "libopencore-amrnb support ${libopencore_amrnb-no}"
+echo "libopencore-amrwb support ${libopencore_amrwb-no}"
 echo "libopenjpeg enabled       ${libopenjpeg-no}"
 echo "libschroedinger enabled   ${libschroedinger-no}"
 echo "libspeex enabled          ${libspeex-no}"
diff --git a/doc/general.texi b/doc/general.texi
index 97a62bbe49..fc7b06d3c2 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -18,7 +18,22 @@ explicitly requested by passing the appropriate flags to @file{./configure}.
 
 AMR comes in two different flavors, wideband and narrowband. FFmpeg can make
 use of the AMR wideband (floating-point mode) and the AMR narrowband
-(floating-point mode) reference decoders and encoders.
+(floating-point mode) reference decoders and encoders (libamr) as well as
+the OpenCORE libraries for AMR-NB decoding/encoding and AMR-WB decoding.
+
+@subsection OpenCORE
+
+Go to @url{http://gitorious.org/opencore-amr/} and follow the instructions for
+installing the libraries. Then pass @code{--enable-libopencore-amrnb} and/or
+@code{--enable-libopencore-amrwb} to configure to enable the libraries.
+
+Note that OpenCORE is under the Apache License 2.0 (see
+@url{http://www.apache.org/licenses/LICENSE-2.0} for details), which is
+incompatible with the LGPL version 2.1 and GPL version 2. You have to
+upgrade FFmpeg's license to LGPL version 3 (or if you have enabled
+GPL components, GPL version 3) to use it.
+
+@subsection libamr
 
 Go to @url{http://www.penguin.cz/~utx/amr} and follow the instructions for
 installing the libraries. Then pass @code{--enable-libamr-nb} and/or
@@ -511,9 +526,10 @@ following image formats are supported:
     @tab Used in Westwood Studios games like Command and Conquer.
 @item ADPCM Yamaha           @tab  X  @tab  X
 @item AMR-NB                 @tab  E  @tab  E
-    @tab supported through external library libamrnb
+    @tab supported through external libraries libamrnb and libopencore-amrnb
 @item AMR-WB                 @tab  E  @tab  E
-    @tab supported through external library libamrwb
+    @tab decoding supported through external libraries libamrwb and libopencore-amrwb,
+         encoding supported through external library libamrwb
 @item Apple lossless audio   @tab  X  @tab  X
     @tab QuickTime fourcc 'alac'
 @item Atrac 3                @tab     @tab  X
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 5067354da1..cf742d17f4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -366,6 +366,8 @@ OBJS-$(CONFIG_LIBFAAC)                 += libfaac.o
 OBJS-$(CONFIG_LIBFAAD)                 += libfaad.o
 OBJS-$(CONFIG_LIBGSM)                  += libgsm.o
 OBJS-$(CONFIG_LIBMP3LAME)              += libmp3lame.o
+OBJS-$(CONFIG_LIBOPENCORE_AMRNB)       += libopencore-amr.o
+OBJS-$(CONFIG_LIBOPENCORE_AMRWB)       += libopencore-amr.o
 OBJS-$(CONFIG_LIBOPENJPEG)             += libopenjpeg.o
 OBJS-$(CONFIG_LIBSCHROEDINGER_DECODER) += libschroedingerdec.o libschroedinger.o libdirac_libschro.o
 OBJS-$(CONFIG_LIBSCHROEDINGER_ENCODER) += libschroedingerenc.o libschroedinger.o libdirac_libschro.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0cb0e6d239..4a6cbc05ed 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -294,6 +294,8 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC  (LIBGSM, libgsm);
     REGISTER_ENCDEC  (LIBGSM_MS, libgsm_ms);
     REGISTER_ENCODER (LIBMP3LAME, libmp3lame);
+    REGISTER_ENCDEC  (LIBOPENCORE_AMRNB, libopencore_amrnb);
+    REGISTER_DECODER (LIBOPENCORE_AMRWB, libopencore_amrwb);
     REGISTER_DECODER (LIBOPENJPEG, libopenjpeg);
     REGISTER_ENCDEC  (LIBSCHROEDINGER, libschroedinger);
     REGISTER_DECODER (LIBSPEEX, libspeex);
diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c
new file mode 100644
index 0000000000..1544db7bed
--- /dev/null
+++ b/libavcodec/libopencore-amr.c
@@ -0,0 +1,325 @@
+/*
+ * AMR Audio decoder stub
+ * Copyright (c) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+
+static void amr_decode_fix_avctx(AVCodecContext *avctx)
+{
+    const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
+
+    if (!avctx->sample_rate)
+        avctx->sample_rate = 8000 * is_amr_wb;
+
+    if (!avctx->channels)
+        avctx->channels = 1;
+
+    avctx->frame_size = 160 * is_amr_wb;
+    avctx->sample_fmt = SAMPLE_FMT_S16;
+}
+
+#if CONFIG_LIBOPENCORE_AMRNB
+
+#include <opencore-amrnb/interf_dec.h>
+#include <opencore-amrnb/interf_enc.h>
+
+static const char nb_bitrate_unsupported[] =
+    "bitrate not supported: use one of 4.75k, 5.15k, 5.9k, 6.7k, 7.4k, 7.95k, 10.2k or 12.2k\n";
+
+/* Common code for fixed and float version*/
+typedef struct AMR_bitrates {
+    int       rate;
+    enum Mode mode;
+} AMR_bitrates;
+
+/* Match desired bitrate */
+static int getBitrateMode(int bitrate)
+{
+    /* make the correspondance between bitrate and mode */
+    AMR_bitrates rates[] = { { 4750, MR475},
+                             { 5150, MR515},
+                             { 5900, MR59},
+                             { 6700, MR67},
+                             { 7400, MR74},
+                             { 7950, MR795},
+                             {10200, MR102},
+                             {12200, MR122}, };
+    int i;
+
+    for (i = 0; i < 8; i++)
+        if (rates[i].rate == bitrate)
+            return rates[i].mode;
+    /* no bitrate matching, return an error */
+    return -1;
+}
+
+typedef struct AMRContext {
+    int   frameCount;
+    void *decState;
+    int  *enstate;
+    int   enc_bitrate;
+} AMRContext;
+
+static av_cold int amr_nb_decode_init(AVCodecContext *avctx)
+{
+    AMRContext *s = avctx->priv_data;
+
+    s->frameCount = 0;
+    s->decState   = Decoder_Interface_init();
+    if (!s->decState) {
+        av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
+        return -1;
+    }
+
+    amr_decode_fix_avctx(avctx);
+
+    if (avctx->channels > 1) {
+        av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static av_cold int amr_nb_decode_close(AVCodecContext *avctx)
+{
+    AMRContext *s = avctx->priv_data;
+
+    Decoder_Interface_exit(s->decState);
+    return 0;
+}
+
+static int amr_nb_decode_frame(AVCodecContext *avctx, void *data,
+                               int *data_size, AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    AMRContext *s = avctx->priv_data;
+    const uint8_t *amrData = buf;
+    static const uint8_t block_size[16] = { 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
+    enum Mode dec_mode;
+    int packet_size;
+
+    /* av_log(NULL, AV_LOG_DEBUG, "amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",
+              buf, buf_size, s->frameCount); */
+
+    dec_mode = (buf[0] >> 3) & 0x000F;
+    packet_size = block_size[dec_mode] + 1;
+
+    if (packet_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n",
+               buf_size, packet_size);
+        return -1;
+    }
+
+    s->frameCount++;
+    /* av_log(NULL, AV_LOG_DEBUG, "packet_size=%d amrData= 0x%X %X %X %X\n",
+              packet_size, amrData[0], amrData[1], amrData[2], amrData[3]); */
+    /* call decoder */
+    Decoder_Interface_Decode(s->decState, amrData, data, 0);
+    *data_size = 160 * 2;
+
+    return packet_size;
+}
+
+AVCodec libopencore_amrnb_decoder = {
+    "libopencore_amrnb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_NB,
+    sizeof(AMRContext),
+    amr_nb_decode_init,
+    NULL,
+    amr_nb_decode_close,
+    amr_nb_decode_frame,
+    .long_name = NULL_IF_CONFIG_SMALL("OpenCORE Adaptive Multi-Rate (AMR) Narrow-Band"),
+};
+
+static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
+{
+    AMRContext *s = avctx->priv_data;
+
+    s->frameCount = 0;
+
+    if (avctx->sample_rate != 8000) {
+        av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
+        return -1;
+    }
+
+    if (avctx->channels != 1) {
+        av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
+        return -1;
+    }
+
+    avctx->frame_size  = 160;
+    avctx->coded_frame = avcodec_alloc_frame();
+
+    s->enstate=Encoder_Interface_init(0);
+    if (!s->enstate) {
+        av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
+        return -1;
+    }
+
+    if ((s->enc_bitrate = getBitrateMode(avctx->bit_rate)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
+        return -1;
+    }
+
+    return 0;
+}
+
+static av_cold int amr_nb_encode_close(AVCodecContext *avctx)
+{
+    AMRContext *s = avctx->priv_data;
+
+    Encoder_Interface_exit(s->enstate);
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+static int amr_nb_encode_frame(AVCodecContext *avctx,
+                               unsigned char *frame/*out*/,
+                               int buf_size, void *data/*in*/)
+{
+    AMRContext *s = avctx->priv_data;
+    int written;
+
+    if ((s->enc_bitrate = getBitrateMode(avctx->bit_rate)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
+        return -1;
+    }
+
+    written = Encoder_Interface_Encode(s->enstate, s->enc_bitrate, data,
+                                       frame, 0);
+    /* av_log(NULL, AV_LOG_DEBUG, "amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",
+              written, s->enc_bitrate, frame[0] ); */
+
+    return written;
+}
+
+AVCodec libopencore_amrnb_encoder = {
+    "libopencore_amrnb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_NB,
+    sizeof(AMRContext),
+    amr_nb_encode_init,
+    amr_nb_encode_frame,
+    amr_nb_encode_close,
+    NULL,
+    .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE},
+    .long_name = NULL_IF_CONFIG_SMALL("OpenCORE Adaptive Multi-Rate (AMR) Narrow-Band"),
+};
+
+#endif
+
+/* -----------AMR wideband ------------*/
+#if CONFIG_LIBOPENCORE_AMRWB
+
+#ifdef _TYPEDEF_H
+//To avoid duplicate typedefs from typedef in amr-nb
+#define typedef_h
+#endif
+
+#include <opencore-amrwb/dec_if.h>
+#include <opencore-amrwb/if_rom.h>
+
+static const char wb_bitrate_unsupported[] =
+    "bitrate not supported: use one of 6.6k, 8.85k, 12.65k, 14.25k, 15.85k, 18.25k, 19.85k, 23.05k, or 23.85k\n";
+
+/* Common code for fixed and float version*/
+typedef struct AMRWB_bitrates {
+    int rate;
+    int mode;
+} AMRWB_bitrates;
+
+typedef struct AMRWBContext {
+    int    frameCount;
+    void  *state;
+    int    mode;
+    Word16 allow_dtx;
+} AMRWBContext;
+
+static av_cold int amr_wb_decode_init(AVCodecContext *avctx)
+{
+    AMRWBContext *s = avctx->priv_data;
+
+    s->frameCount = 0;
+    s->state      = D_IF_init();
+
+    amr_decode_fix_avctx(avctx);
+
+    if (avctx->channels > 1) {
+        av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int amr_wb_decode_frame(AVCodecContext *avctx, void *data,
+                               int *data_size, AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    AMRWBContext *s = avctx->priv_data;
+    const uint8_t *amrData = buf;
+    int mode;
+    int packet_size;
+    static const uint8_t block_size[16] = {18, 24, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1};
+
+    if (!buf_size)
+        /* nothing to do */
+        return 0;
+
+    mode = (amrData[0] >> 3) & 0x000F;
+    packet_size = block_size[mode];
+
+    if (packet_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n",
+               buf_size, packet_size + 1);
+        return -1;
+    }
+
+    s->frameCount++;
+    D_IF_decode(s->state, amrData, data, _good_frame);
+    *data_size = 320 * 2;
+    return packet_size;
+}
+
+static int amr_wb_decode_close(AVCodecContext *avctx)
+{
+    AMRWBContext *s = avctx->priv_data;
+
+    D_IF_exit(s->state);
+    return 0;
+}
+
+AVCodec libopencore_amrwb_decoder = {
+    "libopencore_amrwb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_WB,
+    sizeof(AMRWBContext),
+    amr_wb_decode_init,
+    NULL,
+    amr_wb_decode_close,
+    amr_wb_decode_frame,
+    .long_name = NULL_IF_CONFIG_SMALL("OpenCORE Adaptive Multi-Rate (AMR) Wide-Band"),
+};
+
+#endif /* CONFIG_LIBOPENCORE_AMRWB */

From 44b20d1d7419ae26a4166ae123ce1dc09e6bf796 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 7 Jun 2009 22:41:11 +0000
Subject: [PATCH 029/315] Fix OpenCORE build: Do not use new AVPacket
 infrastructure from trunk.

Originally committed as revision 19134 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/libopencore-amr.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c
index 1544db7bed..6ff8c3eada 100644
--- a/libavcodec/libopencore-amr.c
+++ b/libavcodec/libopencore-amr.c
@@ -107,10 +107,9 @@ static av_cold int amr_nb_decode_close(AVCodecContext *avctx)
 }
 
 static int amr_nb_decode_frame(AVCodecContext *avctx, void *data,
-                               int *data_size, AVPacket *avpkt)
+                               int *data_size,
+                               const uint8_t *buf, int buf_size)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size       = avpkt->size;
     AMRContext *s = avctx->priv_data;
     const uint8_t *amrData = buf;
     static const uint8_t block_size[16] = { 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
@@ -272,11 +271,10 @@ static av_cold int amr_wb_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int amr_wb_decode_frame(AVCodecContext *avctx, void *data,
-                               int *data_size, AVPacket *avpkt)
+static int amr_wb_decode_frame(AVCodecContext *avctx,
+                               void *data, int *data_size,
+                               const uint8_t *buf, int buf_size)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size       = avpkt->size;
     AMRWBContext *s = avctx->priv_data;
     const uint8_t *amrData = buf;
     int mode;

From 5d621410922d42ca3c641102b766d3ddc297dc98 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Wed, 1 Jul 2009 20:14:19 +0000
Subject: [PATCH 030/315] Update ffmpeg documentation regarding metadata
 setting. -title, -author, -copyright, -track, -album, and -year options have
 been dropped in favor of -metadata.

Backfix of r19285, r19287, and r19320.


Originally committed as revision 19321 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 doc/ffmpeg-doc.texi | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/doc/ffmpeg-doc.texi b/doc/ffmpeg-doc.texi
index e3f2dc8727..e306894fe4 100644
--- a/doc/ffmpeg-doc.texi
+++ b/doc/ffmpeg-doc.texi
@@ -276,29 +276,16 @@ The offset is added to the timestamps of the input files.
 Specifying a positive offset means that the corresponding
 streams are delayed by 'offset' seconds.
 
-@item -title @var{string}
-Set the title.
-
 @item -timestamp @var{time}
 Set the timestamp.
 
-@item -author @var{string}
-Set the author.
+@item -metadata @var{key}=@var{value}
+Set a metadata key/value pair.
 
-@item -copyright @var{string}
-Set the copyright.
-
-@item -comment @var{string}
-Set the comment.
-
-@item -album @var{string}
-Set the album.
-
-@item -track @var{number}
-Set the track.
-
-@item -year @var{number}
-Set the year.
+For example, for setting the title in the output file:
+@example
+ffmpeg -i in.avi -metadata title="my title" out.flv
+@end example
 
 @item -v @var{number}
 Set the logging verbosity level.

From 8819b9c600b0e18367ad577e634a6e0bb8879fea Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Fri, 3 Jul 2009 11:14:37 +0000
Subject: [PATCH 031/315] Revert r19321. The changes were not approved.

Originally committed as revision 19329 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 doc/ffmpeg-doc.texi | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/doc/ffmpeg-doc.texi b/doc/ffmpeg-doc.texi
index e306894fe4..e3f2dc8727 100644
--- a/doc/ffmpeg-doc.texi
+++ b/doc/ffmpeg-doc.texi
@@ -276,16 +276,29 @@ The offset is added to the timestamps of the input files.
 Specifying a positive offset means that the corresponding
 streams are delayed by 'offset' seconds.
 
+@item -title @var{string}
+Set the title.
+
 @item -timestamp @var{time}
 Set the timestamp.
 
-@item -metadata @var{key}=@var{value}
-Set a metadata key/value pair.
+@item -author @var{string}
+Set the author.
 
-For example, for setting the title in the output file:
-@example
-ffmpeg -i in.avi -metadata title="my title" out.flv
-@end example
+@item -copyright @var{string}
+Set the copyright.
+
+@item -comment @var{string}
+Set the comment.
+
+@item -album @var{string}
+Set the album.
+
+@item -track @var{number}
+Set the track.
+
+@item -year @var{number}
+Set the year.
 
 @item -v @var{number}
 Set the logging verbosity level.

From 93229681b53ce6c857f41f2e585d03512320f7aa Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 5 Jul 2009 20:35:02 +0000
Subject: [PATCH 032/315] Merge remaining changes to make libswscale usable in
 LGPL mode from trunk.

Originally committed as revision 19352 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure            |  1 -
 libswscale/swscale.c | 13 ++++++-------
 libswscale/yuv2rgb.c |  2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/configure b/configure
index 9181e792a7..fd11501e22 100755
--- a/configure
+++ b/configure
@@ -1699,7 +1699,6 @@ die_license_disabled gpl libfaad2
 die_license_disabled gpl libx264
 die_license_disabled gpl libxvid
 die_license_disabled gpl postproc
-die_license_disabled gpl swscale
 die_license_disabled gpl x11grab
 
 die_license_disabled nonfree libamr_nb
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 7c335f1680..4338acee03 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -955,13 +955,12 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 
 //Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
 //Plain C versions
-#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
+#if ((!HAVE_MMX || !CONFIG_GPL) && !HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
 #define COMPILE_C
 #endif
 
 #if ARCH_PPC
-#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
-#undef COMPILE_C
+#if HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)
 #define COMPILE_ALTIVEC
 #endif
 #endif //ARCH_PPC
@@ -1637,8 +1636,8 @@ static void globalInit(void){
 
 static SwsFunc getSwsFunc(int flags){
 
-#if defined(RUNTIME_CPUDETECT) && CONFIG_GPL
-#if ARCH_X86
+#if defined(RUNTIME_CPUDETECT)
+#if ARCH_X86 && CONFIG_GPL
     // ordered per speed fastest first
     if (flags & SWS_CPU_CAPS_MMX2)
         return swScale_MMX2;
@@ -1657,7 +1656,7 @@ static SwsFunc getSwsFunc(int flags){
         return swScale_C;
 #endif
     return swScale_C;
-#endif /* ARCH_X86 */
+#endif /* ARCH_X86 && CONFIG_GPL */
 #else //RUNTIME_CPUDETECT
 #if   HAVE_MMX2
     return swScale_MMX2;
@@ -2194,7 +2193,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
         __asm__ volatile("emms\n\t"::: "memory");
 #endif
 
-#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
+#if !defined(RUNTIME_CPUDETECT) //ensure that the flags match the compiled variant if cpudetect is off
     flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
 #if   HAVE_MMX2
     flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 65af412c2c..c95d07c3e5 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -453,7 +453,7 @@ SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c)
 #if CONFIG_MLIB
     t = sws_yuv2rgb_init_mlib(c);
 #endif
-#if HAVE_ALTIVEC && CONFIG_GPL
+#if HAVE_ALTIVEC
     if (c->flags & SWS_CPU_CAPS_ALTIVEC)
         t = sws_yuv2rgb_init_altivec(c);
 #endif

From 7a5e131735f1139a622fe99ab486bf46d6e53c26 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 15 Dec 2009 22:34:22 +0000
Subject: [PATCH 033/315] The license upgrade code was ported from trunk.

Originally committed as revision 20876 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Changelog b/Changelog
index 2462e6e33b..d7fbaafd2f 100644
--- a/Changelog
+++ b/Changelog
@@ -9,6 +9,7 @@ version 0.5.1:
 - fix for GPL code in libswscale that was erroneously activated
 - AltiVec code in libswscale is now LGPL
 - remaining GPL parts in AC-3 decoder converted to LGPL
+- (L)GPL license upgrade support
 - AMR-NB decoding/encoding, AMR-WB decoding via OpenCORE libraries
 
 
From a9785f58c670bf5bbd2d8700437f4f36c641fa88 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Mon, 1 Feb 2010 16:00:09 +0000
Subject: [PATCH 034/315] backport symbol versioning patch

Originally committed as revision 21595 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 common.mak                |  6 +++++-
 configure                 | 16 +++++++++++++---
 libavcodec/libavcodec.v   |  3 +++
 libavdevice/libavdevice.v |  4 ++++
 libavfilter/libavfilter.v |  4 ++++
 libavformat/libavformat.v |  3 +++
 libavutil/libavutil.v     |  4 ++++
 libpostproc/libpostproc.v |  4 ++++
 libswscale/libswscale.v   |  3 +++
 subdir.mak                |  2 +-
 10 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 libavcodec/libavcodec.v
 create mode 100644 libavdevice/libavdevice.v
 create mode 100644 libavfilter/libavfilter.v
 create mode 100644 libavformat/libavformat.v
 create mode 100644 libavutil/libavutil.v
 create mode 100644 libpostproc/libpostproc.v
 create mode 100644 libswscale/libswscale.v

diff --git a/common.mak b/common.mak
index 4be056c876..a98ccd75e2 100644
--- a/common.mak
+++ b/common.mak
@@ -9,6 +9,7 @@ vpath %.c $(SRC_DIR)
 vpath %.h $(SRC_DIR)
 vpath %.S $(SRC_DIR)
 vpath %.asm $(SRC_DIR)
+vpath %.v   $(SRC_DIR)
 
 ifeq ($(SRC_DIR),$(SRC_PATH_BARE))
 BUILD_ROOT_REL = .
@@ -43,6 +44,9 @@ CFLAGS := -DHAVE_AV_CONFIG_H -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE \
 
 %$(EXESUF): %.c
 
+%.ver: %.v
+	sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@
+
 SVN_ENTRIES = $(SRC_PATH_BARE)/.svn/entries
 ifeq ($(wildcard $(SVN_ENTRIES)),$(SVN_ENTRIES))
 $(BUILD_ROOT_REL)/version.h: $(SVN_ENTRIES)
@@ -77,7 +81,7 @@ checkheaders: $(filter-out %_template.ho,$(ALLHEADERS:.h=.ho))
 DEPS := $(OBJS:.o=.d)
 depend dep: $(DEPS)
 
-CLEANSUFFIXES = *.o *~ *.ho
+CLEANSUFFIXES = *.o *~ *.ho *.ver
 LIBSUFFIXES   = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a *.exp *.map
 DISTCLEANSUFFIXES = *.d *.pc
 
diff --git a/configure b/configure
index fd11501e22..229b84d21f 100755
--- a/configure
+++ b/configure
@@ -566,13 +566,18 @@ int x;
 EOF
 }
 
-check_ldflags(){
-    log check_ldflags "$@"
-    check_ld "$@" <<EOF && add_ldflags "$@"
+test_ldflags(){
+    log test_ldflags "$@"
+    check_ld "$@" <<EOF
 int main(void){ return 0; }
 EOF
 }
 
+check_ldflags(){
+    log check_ldflags "$@"
+    test_ldflags "$@" && add_ldflags "$@"
+}
+
 check_header(){
     log check_header "$@"
     header=$1
@@ -1356,6 +1361,7 @@ TMPE="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}${EXESUF}"
 TMPH="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.h"
 TMPO="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.o"
 TMPS="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.S"
+TMPV="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.ver"
 TMPSH="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.sh"
 
 # make sure we can execute files in $TMPDIR
@@ -2160,6 +2166,10 @@ check_ldflags -Wl,--as-needed
 check_ldflags '-Wl,-rpath-link,\$(BUILD_ROOT)/libpostproc -Wl,-rpath-link,\$(BUILD_ROOT)/libswscale -Wl,-rpath-link,\$(BUILD_ROOT)/libavfilter -Wl,-rpath-link,\$(BUILD_ROOT)/libavdevice -Wl,-rpath-link,\$(BUILD_ROOT)/libavformat -Wl,-rpath-link,\$(BUILD_ROOT)/libavcodec -Wl,-rpath-link,\$(BUILD_ROOT)/libavutil'
 check_ldflags -Wl,-Bsymbolic
 
+echo "X{};" > $TMPV
+test_ldflags -Wl,--version-script,$TMPV &&
+    append SHFLAGS '-Wl,--version-script,\$(SUBDIR)lib\$(NAME).ver'
+
 if enabled small; then
     check_cflags -Os            # not all compilers support -Os
     optimizations="small"
diff --git a/libavcodec/libavcodec.v b/libavcodec/libavcodec.v
new file mode 100644
index 0000000000..561a42cd4f
--- /dev/null
+++ b/libavcodec/libavcodec.v
@@ -0,0 +1,3 @@
+LIBAVCODEC_$MAJOR {
+        global: *;
+};
diff --git a/libavdevice/libavdevice.v b/libavdevice/libavdevice.v
new file mode 100644
index 0000000000..663af85ba8
--- /dev/null
+++ b/libavdevice/libavdevice.v
@@ -0,0 +1,4 @@
+LIBAVDEVICE_$MAJOR {
+        global: avdevice_*;
+        local: *;
+};
diff --git a/libavfilter/libavfilter.v b/libavfilter/libavfilter.v
new file mode 100644
index 0000000000..83e8887080
--- /dev/null
+++ b/libavfilter/libavfilter.v
@@ -0,0 +1,4 @@
+LIBAVFILTER_$MAJOR {
+        global: avfilter_*; av_*;
+        local: *;
+};
diff --git a/libavformat/libavformat.v b/libavformat/libavformat.v
new file mode 100644
index 0000000000..da2311eb36
--- /dev/null
+++ b/libavformat/libavformat.v
@@ -0,0 +1,3 @@
+LIBAVFORMAT_$MAJOR {
+        global: *;
+};
diff --git a/libavutil/libavutil.v b/libavutil/libavutil.v
new file mode 100644
index 0000000000..ec52f2be7a
--- /dev/null
+++ b/libavutil/libavutil.v
@@ -0,0 +1,4 @@
+LIBAVUTIL_$MAJOR {
+        global: av_*; ff_*; avutil_*;
+        local: *;
+};
diff --git a/libpostproc/libpostproc.v b/libpostproc/libpostproc.v
new file mode 100644
index 0000000000..e65d76f4f6
--- /dev/null
+++ b/libpostproc/libpostproc.v
@@ -0,0 +1,4 @@
+LIBPOSTPROC_$MAJOR {
+        global: postproc_*; pp_*;
+        local: *;
+};
diff --git a/libswscale/libswscale.v b/libswscale/libswscale.v
new file mode 100644
index 0000000000..d6bd39a8c5
--- /dev/null
+++ b/libswscale/libswscale.v
@@ -0,0 +1,3 @@
+LIBSWSCALE_%MAJOR% {
+        global: *;
+};
diff --git a/subdir.mak b/subdir.mak
index df5bb8a09a..4749d4efd6 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -27,7 +27,7 @@ install-libs: install-lib$(NAME)-shared
 $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
 	cd ./$(SUBDIR) && $(LN_S) $(SLIBNAME_WITH_MAJOR) $(SLIBNAME)
 
-$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS)
+$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SUBDIR)lib$(NAME).ver
 	$(SLIB_CREATE_DEF_CMD)
 	$(CC) $(SHFLAGS) $(FFLDFLAGS) -o $$@ $$(filter-out $(DEP_LIBS),$$^) $(FFEXTRALIBS) $(EXTRAOBJS)
 	$(SLIB_EXTRA_CMD)

From f5c694972e9b245870b9a3b741ae42fce311f568 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Mon, 1 Feb 2010 16:02:46 +0000
Subject: [PATCH 035/315] mention symbol versioning

Originally committed as revision 21596 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Changelog b/Changelog
index d7fbaafd2f..3d13d8de80 100644
--- a/Changelog
+++ b/Changelog
@@ -11,6 +11,7 @@ version 0.5.1:
 - remaining GPL parts in AC-3 decoder converted to LGPL
 - (L)GPL license upgrade support
 - AMR-NB decoding/encoding, AMR-WB decoding via OpenCORE libraries
+- enable symbol versioning by default for linkers that support it
 
 
From 15dfbc503dec612ab79aa891284f875bdd0ecd6a Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 2 Feb 2010 19:57:40 +0000
Subject: [PATCH 036/315] missing hunk to unbreak linking with symbol
 versioning

this hunk is does not apply in trunk/ because there the dependencies for
this rule was reworked


Originally committed as revision 21608 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 subdir.mak | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subdir.mak b/subdir.mak
index 4749d4efd6..a5f83c5b88 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -29,7 +29,7 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
 
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SUBDIR)lib$(NAME).ver
 	$(SLIB_CREATE_DEF_CMD)
-	$(CC) $(SHFLAGS) $(FFLDFLAGS) -o $$@ $$(filter-out $(DEP_LIBS),$$^) $(FFEXTRALIBS) $(EXTRAOBJS)
+	$(CC) $(SHFLAGS) $(FFLDFLAGS) -o $$@ $$(filter-out $(SUBDIR)lib$(NAME).ver $(DEP_LIBS),$$^) $(FFEXTRALIBS) $(EXTRAOBJS)
 	$(SLIB_EXTRA_CMD)
 
 ifdef SUBDIR

From 49549033fda91134d442a276c7326bd26f09c8ba Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 2 Feb 2010 19:58:05 +0000
Subject: [PATCH 037/315] unbreak compilation with vhook enabled

the backported introduced a regression that prevents successfully
linking vhook shared objects on systems with gnu linkers. As the version
scripts only apply to shared objects against that applications are being
linked, this commit unties the VHOOKSHFLAGS and SHFLAGS variable.


Originally committed as revision 21609 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 229b84d21f..04fc73d0ce 100755
--- a/configure
+++ b/configure
@@ -1201,7 +1201,7 @@ vhook="default"
 # build settings
 add_cflags -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112
 SHFLAGS='-shared -Wl,-soname,$$(@F)'
-VHOOKSHFLAGS='$(SHFLAGS)'
+VHOOKSHFLAGS='-shared -Wl,-soname,$$(@F)'
 FFSERVERLDFLAGS=-Wl,-E
 LIBPREF="lib"
 LIBSUF=".a"

From de3196da60e5d87c110907aadbc49b925e60b5a7 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 2 Feb 2010 20:16:46 +0000
Subject: [PATCH 038/315] fix version script for libswscale

Originally committed as revision 21610 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libswscale/libswscale.v | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/libswscale.v b/libswscale/libswscale.v
index d6bd39a8c5..06e907a228 100644
--- a/libswscale/libswscale.v
+++ b/libswscale/libswscale.v
@@ -1,3 +1,3 @@
-LIBSWSCALE_%MAJOR% {
+LIBSWSCALE_$MAJOR {
         global: *;
 };

From 478394bab70785284af35237712f2e06da8b2937 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 2 Feb 2010 20:30:20 +0000
Subject: [PATCH 039/315] backport configure bits for cpu runtime detection for
 libpostproc and libswscale

Originally committed as revision 21611 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure                      |  3 +++
 libavcodec/ppc/check_altivec.c |  2 +-
 libpostproc/postprocess.c      | 14 +++++++-------
 libswscale/swscale.c           | 20 ++++++++++----------
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/configure b/configure
index 04fc73d0ce..fa9d670ef9 100755
--- a/configure
+++ b/configure
@@ -103,6 +103,7 @@ show_help(){
   echo "  --disable-golomb         disable Golomb code"
   echo "  --disable-mdct           disable MDCT code"
   echo "  --disable-rdft           disable RDFT code"
+  echo "  --enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)"
   echo "  --enable-hardcoded-tables use hardcoded tables instead of runtime generation"
   echo "  --enable-memalign-hack   emulate memalign, interferes with memory debuggers"
   echo "  --enable-beos-netserver  enable BeOS netserver"
@@ -805,6 +806,7 @@ CONFIG_LIST="
     postproc
     powerpc_perf
     rdft
+    runtime_cpudetect
     shared
     small
     static
@@ -2268,6 +2270,7 @@ if test "$extra_version" != ""; then
     echo "version string suffix     $extra_version"
 fi
 echo "big-endian                ${bigendian-no}"
+echo "runtime cpu detection     ${runtime_cpudetect-no}"
 if enabled x86; then
     echo "yasm                      ${yasm-no}"
     echo "MMX enabled               ${mmx-no}"
diff --git a/libavcodec/ppc/check_altivec.c b/libavcodec/ppc/check_altivec.c
index e034ceba41..08cc0f4374 100644
--- a/libavcodec/ppc/check_altivec.c
+++ b/libavcodec/ppc/check_altivec.c
@@ -63,7 +63,7 @@ int has_altivec(void)
 
     if (err == 0) return has_vu != 0;
     return 0;
-#elif defined(RUNTIME_CPUDETECT)
+#elif CONFIG_RUNTIME_CPUDETECT
     int proc_ver;
     // Support of mfspr PVR emulation added in Linux 2.6.17.
     __asm__ volatile("mfspr %0, 287" : "=r" (proc_ver));
diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 4cfbaad7ca..b5b6649f93 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -554,7 +554,7 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
 
 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 //Plain C versions
-#if !(HAVE_MMX || HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
+#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_C
 #endif
 
@@ -564,15 +564,15 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
 
 #if ARCH_X86
 
-#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
+#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_MMX
 #endif
 
-#if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
+#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_MMX2
 #endif
 
-#if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
+#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_3DNOW
 #endif
 #endif /* ARCH_X86 */
@@ -645,7 +645,7 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]
     // Using ifs here as they are faster than function pointers although the
     // difference would not be measurable here but it is much better because
     // someone might exchange the CPU whithout restarting MPlayer ;)
-#ifdef RUNTIME_CPUDETECT
+#if CONFIG_RUNTIME_CPUDETECT
 #if ARCH_X86
     // ordered per speed fastest first
     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
@@ -664,7 +664,7 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]
 #endif
             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #endif
-#else //RUNTIME_CPUDETECT
+#else //CONFIG_RUNTIME_CPUDETECT
 #if   HAVE_MMX2
             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #elif HAVE_AMD3DNOW
@@ -676,7 +676,7 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]
 #else
             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #endif
-#endif //!RUNTIME_CPUDETECT
+#endif //!CONFIG_RUNTIME_CPUDETECT
 }
 
 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 4338acee03..0c089c810a 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -955,27 +955,27 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 
 //Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
 //Plain C versions
-#if ((!HAVE_MMX || !CONFIG_GPL) && !HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
+#if ((!HAVE_MMX || !CONFIG_GPL) && !HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_C
 #endif
 
 #if ARCH_PPC
-#if HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)
+#if HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_ALTIVEC
 #endif
 #endif //ARCH_PPC
 
 #if ARCH_X86
 
-#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
 #define COMPILE_MMX
 #endif
 
-#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
 #define COMPILE_MMX2
 #endif
 
-#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
 #define COMPILE_3DNOW
 #endif
 #endif //ARCH_X86
@@ -1636,7 +1636,7 @@ static void globalInit(void){
 
 static SwsFunc getSwsFunc(int flags){
 
-#if defined(RUNTIME_CPUDETECT)
+#if CONFIG_RUNTIME_CPUDETECT
 #if ARCH_X86 && CONFIG_GPL
     // ordered per speed fastest first
     if (flags & SWS_CPU_CAPS_MMX2)
@@ -1657,7 +1657,7 @@ static SwsFunc getSwsFunc(int flags){
 #endif
     return swScale_C;
 #endif /* ARCH_X86 && CONFIG_GPL */
-#else //RUNTIME_CPUDETECT
+#else //CONFIG_RUNTIME_CPUDETECT
 #if   HAVE_MMX2
     return swScale_MMX2;
 #elif HAVE_AMD3DNOW
@@ -1669,7 +1669,7 @@ static SwsFunc getSwsFunc(int flags){
 #else
     return swScale_C;
 #endif
-#endif //!RUNTIME_CPUDETECT
+#endif //!CONFIG_RUNTIME_CPUDETECT
 }
 
 static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
@@ -2193,7 +2193,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
         __asm__ volatile("emms\n\t"::: "memory");
 #endif
 
-#if !defined(RUNTIME_CPUDETECT) //ensure that the flags match the compiled variant if cpudetect is off
+#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
     flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
 #if   HAVE_MMX2
     flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
@@ -2206,7 +2206,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
 #elif ARCH_BFIN
     flags |= SWS_CPU_CAPS_BFIN;
 #endif
-#endif /* RUNTIME_CPUDETECT */
+#endif /* CONFIG_RUNTIME_CPUDETECT */
     if (clip_table[512] != 255) globalInit();
     if (!rgb15to16) sws_rgb2rgb_init(flags);
 

From b18806f811db30cb8215f6508ab059a0ef2eacd3 Mon Sep 17 00:00:00 2001
From: Andres Mejia <mcitadel@gmail.com>
Date: Tue, 2 Feb 2010 20:44:47 +0000
Subject: [PATCH 040/315] Add gnu/kfreebsd to list of recognized operating
 systems. patch by Andres Mejia, mcitadel gmail com

Originally committed as revision 21612 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure b/configure
index fa9d670ef9..1b89283a62 100755
--- a/configure
+++ b/configure
@@ -1657,6 +1657,8 @@ case $target_os in
     interix)
         disable vhook
         ;;
+    gnu/kfreebsd)
+        ;;
 
     *)
         die "Unknown OS '$target_os'."

From e21e76a914c6194438ee649b06c9be2bed7bfca5 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 2 Feb 2010 20:48:21 +0000
Subject: [PATCH 041/315] build PIC code on powerpc

this avoids failing 24bit relocations as seen on
http://bugs.debian.org/561956 as side effect


Originally committed as revision 21613 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 1b89283a62..3e74ab8311 100755
--- a/configure
+++ b/configure
@@ -2219,7 +2219,7 @@ if enabled shared; then
     # LIBOBJFLAGS may have already been set in the OS configuration
     if test -z "$LIBOBJFLAGS" ; then
         case "${subarch-$arch}" in
-            x86_64|ia64|alpha|sparc*|power*|parisc*|mips*) LIBOBJFLAGS='$(PIC)' ;;
+            x86_64|ia64|alpha|sparc*|ppc|power*|parisc*|mips*) LIBOBJFLAGS='$(PIC)' ;;
         esac
     fi
 fi

From 7d061cfe175ceef84b9201490163e76b3a6f0e06 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 17:36:18 +0000
Subject: [PATCH 042/315] fix aac playback regression

Discussed at http://comments.gmane.org/gmane.comp.video.ffmpeg.devel/103768

related reports:
 - http://bugs.debian.org/540729
 - https://roundup.ffmpeg.org/roundup/ffmpeg/issue800


Originally committed as revision 21706 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/aac.c | 100 +++++++++++++++++++++++++++++++++++------------
 libavcodec/aac.h |   2 +
 2 files changed, 78 insertions(+), 24 deletions(-)

diff --git a/libavcodec/aac.c b/libavcodec/aac.c
index 80195c0641..b6759dd763 100644
--- a/libavcodec/aac.c
+++ b/libavcodec/aac.c
@@ -97,6 +97,56 @@ static VLC vlc_scalefactors;
 static VLC vlc_spectral[11];
 
 
+static ChannelElement* get_che(AACContext *ac, int type, int elem_id) {
+    static const int8_t tags_per_config[16] = { 0, 1, 1, 2, 3, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0 };
+    if (ac->tag_che_map[type][elem_id]) {
+        return ac->tag_che_map[type][elem_id];
+    }
+    if (ac->tags_mapped >= tags_per_config[ac->m4ac.chan_config]) {
+        return NULL;
+    }
+    switch (ac->m4ac.chan_config) {
+        case 7:
+            if (ac->tags_mapped == 3 && type == TYPE_CPE) {
+                ac->tags_mapped++;
+                return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
+            }
+        case 6:
+            /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
+               instead of SCE[0] CPE[0] CPE[0] LFE[0]. If we seem to have
+               encountered such a stream, transfer the LFE[0] element to SCE[1] */
+            if (ac->tags_mapped == tags_per_config[ac->m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
+                ac->tags_mapped++;
+                return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
+            }
+        case 5:
+            if (ac->tags_mapped == 2 && type == TYPE_CPE) {
+                ac->tags_mapped++;
+                return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
+            }
+        case 4:
+            if (ac->tags_mapped == 2 && ac->m4ac.chan_config == 4 && type == TYPE_SCE) {
+                ac->tags_mapped++;
+                return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
+            }
+        case 3:
+        case 2:
+            if (ac->tags_mapped == (ac->m4ac.chan_config != 2) && type == TYPE_CPE) {
+                ac->tags_mapped++;
+                return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
+            } else if (ac->m4ac.chan_config == 2) {
+                return NULL;
+            }
+        case 1:
+            if (!ac->tags_mapped && type == TYPE_SCE) {
+                ac->tags_mapped++;
+                return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
+            }
+        default:
+            return NULL;
+    }
+}
+
 /**
  * Configure output channel order based on the current program configuration element.
  *
@@ -106,7 +156,7 @@ static VLC vlc_spectral[11];
  * @return  Returns error status. 0 - OK, !0 - error
  */
 static int output_configure(AACContext *ac, enum ChannelPosition che_pos[4][MAX_ELEM_ID],
-        enum ChannelPosition new_che_pos[4][MAX_ELEM_ID]) {
+        enum ChannelPosition new_che_pos[4][MAX_ELEM_ID], int channel_config) {
     AVCodecContext *avctx = ac->avccontext;
     int i, type, channels = 0;
 
@@ -140,7 +190,16 @@ static int output_configure(AACContext *ac, enum ChannelPosition che_pos[4][MAX_
         }
     }
 
+    if (channel_config) {
+        memset(ac->tag_che_map, 0,       4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
+        ac->tags_mapped = 0;
+    } else {
+        memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
+        ac->tags_mapped = 4*MAX_ELEM_ID;
+    }
+
     avctx->channels = channels;
+
     return 0;
 }
 
@@ -286,7 +345,7 @@ static int decode_ga_specific_config(AACContext * ac, GetBitContext * gb, int ch
         if((ret = set_default_channel_config(ac, new_che_pos, channel_config)))
             return ret;
     }
-    if((ret = output_configure(ac, ac->che_pos, new_che_pos)))
+    if((ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config)))
         return ret;
 
     if (extension_flag) {
@@ -394,7 +453,7 @@ static av_cold int aac_decode_init(AVCodecContext * avccontext) {
         memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
         if(set_default_channel_config(ac, new_che_pos, avccontext->channels - (avccontext->channels == 8)))
             return -1;
-        if(output_configure(ac, ac->che_pos, new_che_pos))
+        if(output_configure(ac, ac->che_pos, new_che_pos, 1))
             return -1;
         ac->m4ac.sample_rate = avccontext->sample_rate;
     } else {
@@ -1538,19 +1597,20 @@ static int parse_adts_frame_header(AACContext * ac, GetBitContext * gb) {
         ac->m4ac.sample_rate     = hdr_info.sample_rate;
         ac->m4ac.sampling_index  = hdr_info.sampling_index;
         ac->m4ac.object_type     = hdr_info.object_type;
-    }
-    if (hdr_info.num_aac_frames == 1) {
-        if (!hdr_info.crc_absent)
-            skip_bits(gb, 16);
-    } else {
-        ff_log_missing_feature(ac->avccontext, "More than one AAC RDB per ADTS frame is", 0);
-        return -1;
+        if (hdr_info.num_aac_frames == 1) {
+            if (!hdr_info.crc_absent)
+                skip_bits(gb, 16);
+        } else {
+            ff_log_missing_feature(ac->avccontext, "More than one AAC RDB per ADTS frame is", 0);
+            return -1;
+        }
     }
     return size;
 }
 
 static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
     AACContext * ac = avccontext->priv_data;
+    ChannelElement * che = NULL;
     GetBitContext gb;
     enum RawDataBlockType elem_type;
     int err, elem_id, data_size_tmp;
@@ -1573,15 +1633,7 @@ static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data
         elem_id = get_bits(&gb, 4);
         err = -1;
 
-        if(elem_type == TYPE_SCE && elem_id == 1 &&
-                !ac->che[TYPE_SCE][elem_id] && ac->che[TYPE_LFE][0]) {
-            /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
-               instead of SCE[0] CPE[0] CPE[0] LFE[0]. If we seem to have
-               encountered such a stream, transfer the LFE[0] element to SCE[1] */
-            ac->che[TYPE_SCE][elem_id] = ac->che[TYPE_LFE][0];
-            ac->che[TYPE_LFE][0] = NULL;
-        }
-        if(elem_type < TYPE_DSE && !ac->che[elem_type][elem_id]) {
+        if(elem_type < TYPE_DSE && !(che=get_che(ac, elem_type, elem_id))) {
             av_log(ac->avccontext, AV_LOG_ERROR, "channel element %d.%d is not allocated\n", elem_type, elem_id);
             return -1;
         }
@@ -1589,19 +1641,19 @@ static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data
         switch (elem_type) {
 
         case TYPE_SCE:
-            err = decode_ics(ac, &ac->che[TYPE_SCE][elem_id]->ch[0], &gb, 0, 0);
+            err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
             break;
 
         case TYPE_CPE:
-            err = decode_cpe(ac, &gb, ac->che[TYPE_CPE][elem_id]);
+            err = decode_cpe(ac, &gb, che);
             break;
 
         case TYPE_CCE:
-            err = decode_cce(ac, &gb, ac->che[TYPE_CCE][elem_id]);
+            err = decode_cce(ac, &gb, che);
             break;
 
         case TYPE_LFE:
-            err = decode_ics(ac, &ac->che[TYPE_LFE][elem_id]->ch[0], &gb, 0, 0);
+            err = decode_ics(ac, &che->ch[0], &gb, 0, 0);
             break;
 
         case TYPE_DSE:
@@ -1615,7 +1667,7 @@ static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data
             memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
             if((err = decode_pce(ac, new_che_pos, &gb)))
                 break;
-            err = output_configure(ac, ac->che_pos, new_che_pos);
+            err = output_configure(ac, ac->che_pos, new_che_pos, 0);
             break;
         }
 
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 66b2e22d0e..32e7224c47 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -260,6 +260,8 @@ typedef struct {
                                                    *   first index as the first 4 raw data block types
                                                    */
     ChannelElement * che[4][MAX_ELEM_ID];
+    ChannelElement * tag_che_map[4][MAX_ELEM_ID];
+    int tags_mapped;
     /** @} */
 
     /**

From 1e9ac36f66560d94f252a176936c82763977a785 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 18:10:07 +0000
Subject: [PATCH 043/315] Make arguments of av_set_pts_info() unsigned. Fixes
 issue1240/mpeg1/smclockmpeg1.avi.3.1

Originally committed as revision 21707 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/avformat.h | 2 +-
 libavformat/utils.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 18ec2c86f9..6b871444b8 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1035,7 +1035,7 @@ AVChapter *ff_new_chapter(AVFormatContext *s, int id, AVRational time_base,
  * @param pts_den denominator to convert to seconds (MPEG: 90000)
  */
 void av_set_pts_info(AVStream *s, int pts_wrap_bits,
-                     int pts_num, int pts_den);
+                     unsigned int pts_num, unsigned int pts_den);
 
 #define AVSEEK_FLAG_BACKWARD 1 ///< seek backward
 #define AVSEEK_FLAG_BYTE     2 ///< seeking based on position in bytes
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 0ffe96a00b..223d567f75 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -3286,7 +3286,7 @@ char *ff_data_to_hex(char *buff, const uint8_t *src, int s)
 }
 
 void av_set_pts_info(AVStream *s, int pts_wrap_bits,
-                     int pts_num, int pts_den)
+                     unsigned int pts_num, unsigned int pts_den)
 {
     unsigned int gcd= av_gcd(pts_num, pts_den);
     s->pts_wrap_bits = pts_wrap_bits;

From 95f90d27d213fc3b9ca238a542728cd36a7583c7 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 18:24:30 +0000
Subject: [PATCH 044/315] Disable parsing for ogg streams where no ogg header
 was found, if no header was found the parser was not initialized and thus
 will crash when trying to use it.

Originally committed as revision 21708 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/oggdec.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 28dc56eeb9..54406f5479 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -477,12 +477,17 @@ static int
 ogg_read_header (AVFormatContext * s, AVFormatParameters * ap)
 {
     struct ogg *ogg = s->priv_data;
+    int i;
     ogg->curidx = -1;
     //linear headers seek from start
     if (ogg_get_headers (s) < 0){
         return -1;
     }
 
+    for (i = 0; i < ogg->nstreams; i++)
+        if (ogg->streams[i].header < 0)
+            ogg->streams[i].codec = NULL;
+
     //linear granulepos seek from end
     ogg_get_length (s);
 

From e91ba7dc9d0c475f7b58b906033fa71826084d4b Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 18:41:17 +0000
Subject: [PATCH 045/315] add one missing check for stream existence in
 read_elst, fix #1364

backported patch r19792 by bcoudurier


Originally committed as revision 21709 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/mov.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index b34326c3cf..1beba9ef90 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1776,9 +1776,13 @@ free_and_return:
 /* edit list atom */
 static int mov_read_elst(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    MOVStreamContext *sc = c->fc->streams[c->fc->nb_streams-1]->priv_data;
+    MOVStreamContext *sc;
     int i, edit_count;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    sc = c->fc->streams[c->fc->nb_streams-1]->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
     edit_count = get_be32(pb); /* entries */

From 7db16a81733e9380eaf85dd8db0e4080841243e4 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 18:44:49 +0000
Subject: [PATCH 046/315] check stream existence before assignment, fix #1222

backported r19259 by bcoudurier


Originally committed as revision 21710 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/mov.c | 136 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 107 insertions(+), 29 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 1beba9ef90..c55b8b2a35 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -238,10 +238,15 @@ static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     int entries, i, j;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_be32(pb); // version + flags
     entries = get_be32(pb);
     if (entries >= UINT_MAX / sizeof(*sc->drefs))
@@ -381,9 +386,13 @@ static const AVCodecTag mp4_audio_types[] = {
 
 static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
+    AVStream *st;
     int tag, len;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+
     get_be32(pb); /* version + flags */
     len = mp4_read_descr(c, pb, &tag);
     if (tag == MP4ESDescrTag) {
@@ -440,7 +449,12 @@ static int mov_read_pasp(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
     const int num = get_be32(pb);
     const int den = get_be32(pb);
-    AVStream * const st = c->fc->streams[c->fc->nb_streams-1];
+    AVStream *st;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+
     if (den != 0) {
         if ((st->sample_aspect_ratio.den != 1 || st->sample_aspect_ratio.num) && // default
             (den != st->sample_aspect_ratio.den || num != st->sample_aspect_ratio.num))
@@ -494,12 +508,18 @@ static int mov_read_moof(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
-    int version = get_byte(pb);
+    AVStream *st;
+    MOVStreamContext *sc;
+    int version;
     char language[4] = {0};
     unsigned lang;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
+    version = get_byte(pb);
     if (version > 1)
         return -1; /* unsupported */
 
@@ -561,7 +581,11 @@ static int mov_read_mvhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
+    AVStream *st;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
 
     if((uint64_t)atom.size > (1<<30))
         return -1;
@@ -581,9 +605,14 @@ static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    int little_endian = get_be16(pb);
+    AVStream *st;
+    int little_endian;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+
+    little_endian = get_be16(pb);
     dprintf(c->fc, "enda %d\n", little_endian);
     if (little_endian == 1) {
         switch (st->codec->codec_id) {
@@ -633,7 +662,11 @@ static int mov_read_extradata(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
+    AVStream *st;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
 
     if((uint64_t)atom.size > (1<<30))
         return -1;
@@ -660,7 +693,11 @@ static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
  */
 static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
+    AVStream *st;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
 
     if((uint64_t)atom.size > (1<<30))
         return -1;
@@ -676,10 +713,15 @@ static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     unsigned int i, entries;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
 
@@ -742,10 +784,15 @@ static enum CodecID mov_get_lpcm_codec_id(int bps, int flags)
 
 static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     int j, entries, pseudo_stream_id;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
 
@@ -1064,10 +1111,15 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     unsigned int i, entries;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
 
@@ -1092,10 +1144,15 @@ static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     unsigned int i, entries;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
 
@@ -1119,10 +1176,15 @@ static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     unsigned int i, entries, sample_size;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
 
@@ -1150,12 +1212,17 @@ static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     unsigned int i, entries;
     int64_t duration=0;
     int64_t total_sample_count=0;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
     entries = get_be32(pb);
@@ -1194,10 +1261,15 @@ static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 
 static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
 {
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
+    AVStream *st;
+    MOVStreamContext *sc;
     unsigned int i, entries;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
     get_byte(pb); /* version */
     get_be24(pb); /* flags */
     entries = get_be32(pb);
@@ -1504,10 +1576,16 @@ static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
     int height;
     int64_t disp_transform[2];
     int display_matrix[3][2];
-    AVStream *st = c->fc->streams[c->fc->nb_streams-1];
-    MOVStreamContext *sc = st->priv_data;
-    int version = get_byte(pb);
+    AVStream *st;
+    MOVStreamContext *sc;
+    int version;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+    sc = st->priv_data;
+
+    version = get_byte(pb);
     get_be24(pb); /* flags */
     /*
     MOV_TRACK_ENABLED 0x0001

From ef84190a1ab777c35ea9fec64c3ab6ce641b79e5 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 18:51:11 +0000
Subject: [PATCH 047/315] Fix possible buffer over-read in vorbis_comment, fix
 it double to be sure. First, make s signed, so that comparisons against end -
 p will not be made as unsigned, making the check incorrectly pass if p is
 beyond end. Also ensure that p will never be > end, so the code is correct
 also if buf is not padded.

backported r20014 by reimar


Originally committed as revision 21711 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/oggparsevorbis.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavformat/oggparsevorbis.c b/libavformat/oggparsevorbis.c
index 7c97807c01..82029daf93 100644
--- a/libavformat/oggparsevorbis.c
+++ b/libavformat/oggparsevorbis.c
@@ -35,27 +35,28 @@ vorbis_comment(AVFormatContext * as, uint8_t *buf, int size)
 {
     const uint8_t *p = buf;
     const uint8_t *end = buf + size;
-    unsigned s, n, j;
+    unsigned n, j;
+    int s;
 
     if (size < 8) /* must have vendor_length and user_comment_list_length */
         return -1;
 
     s = bytestream_get_le32(&p);
 
-    if (end - p < s)
+    if (end - p - 4 < s || s < 0)
         return -1;
 
     p += s;
 
     n = bytestream_get_le32(&p);
 
-    while (p < end && n > 0) {
+    while (end - p >= 4 && n > 0) {
         const char *t, *v;
         int tl, vl;
 
         s = bytestream_get_le32(&p);
 
-        if (end - p < s)
+        if (end - p < s || s < 0)
             break;
 
         t = p;

From c42640b20049517ce641e802935db28c7cdc32ae Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 18:55:41 +0000
Subject: [PATCH 048/315] Fix a possibly exploitable buffer overflow.

backported r18640 by michael


Originally committed as revision 21712 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/ffv1.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 72c5fbd881..ccfcb62dc4 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -248,10 +248,9 @@ static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
     else{
         int i, e, a;
         e= 0;
-        while(get_rac(c, state+1 + e)){ //1..10
+        while(get_rac(c, state+1 + e) && e<9){ //1..10
             e++;
         }
-        assert(e<=9);
 
         a= 1;
         for(i=e-1; i>=0; i--){

From acac7858bd5066cb2e56cc353c8ae54786c1f623 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 19:02:39 +0000
Subject: [PATCH 049/315] Check num_units_in_tick/time_scale to be valid and
 within the range we support. based on a patch by chrome

backported r19979 by michael


Originally committed as revision 21715 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/h264.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 91f20c9867..3ea61330ec 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -7016,6 +7016,10 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
     if(sps->timing_info_present_flag){
         sps->num_units_in_tick = get_bits_long(&s->gb, 32);
         sps->time_scale = get_bits_long(&s->gb, 32);
+        if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
+            av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick inavlid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
+            return -1;
+        }
         sps->fixed_frame_rate_flag = get_bits1(&s->gb);
     }
 

From ab546a7463814e052e9bc6f7cfbe1f2e5a38a9da Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 19:09:12 +0000
Subject: [PATCH 050/315] check data_size in decode_frame()

backported r19986 by michael


Originally committed as revision 21716 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/mpegaudiodec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index ce0066bbf7..f4fe71649c 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -2287,6 +2287,9 @@ retry:
     avctx->bit_rate = s->bit_rate;
     avctx->sub_id = s->layer;
 
+    if(*data_size < 1152*avctx->channels*sizeof(OUT_INT))
+        return -1;
+
     if(s->frame_size<=0 || s->frame_size > buf_size){
         av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
         return -1;

From 4b360ee2ca6a5e9b6ca6b862dc391d126e6366dc Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 19:20:25 +0000
Subject: [PATCH 051/315] Check data_size in decode_frame_mp3on4().

backported r19987 by michael


Originally committed as revision 21717 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/mpegaudiodec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index f4fe71649c..629872b305 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -2473,6 +2473,9 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
     OUT_INT *outptr, *bp;
     int fr, j, n;
 
+    if(*data_size < MPA_FRAME_SIZE * MPA_MAX_CHANNELS * s->frames * sizeof(OUT_INT))
+        return -1;
+
     *data_size = 0;
     // Discard too short frames
     if (buf_size < HEADER_SIZE)

From 3835603e7ee917cd95d611f0d663a8cad3466f75 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 19:22:19 +0000
Subject: [PATCH 052/315] Set data_size to 0 to avoid having it uninitialized.
 based on 31_mp3_outlen.patch by chrome.

backported r19988 by michael


Originally committed as revision 21718 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/mpegaudiodec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 629872b305..e7bbd5d541 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -2289,6 +2289,7 @@ retry:
 
     if(*data_size < 1152*avctx->channels*sizeof(OUT_INT))
         return -1;
+    *data_size = 0;
 
     if(s->frame_size<=0 || s->frame_size > buf_size){
         av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");

From 8811fe69877063ec1fc1ba518d33ad99e97cd83c Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:26:47 +0000
Subject: [PATCH 053/315] Fix init_get_bits() buffer size.
 18_fix_theora_header_bit_len.patch by chrome

backport r19993 by michael


Originally committed as revision 21719 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vp3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 64901616cc..07a791d3da 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -2231,7 +2231,7 @@ static av_cold int theora_decode_init(AVCodecContext *avctx)
     }
 
   for(i=0;i<3;i++) {
-    init_get_bits(&gb, header_start[i], header_len[i]);
+    init_get_bits(&gb, header_start[i], header_len[i] * 8);
 
     ptype = get_bits(&gb, 8);
 

From f3fd4286970c018d57e4c31423a71de82d16a149 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 19:31:04 +0000
Subject: [PATCH 054/315] Make sure that all memory allocations succeed. Based
 on 28_theora_malloc_checks.patch from the Google Chrome team.

backport r20008 by melanson


Originally committed as revision 21720 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vp3.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 07a791d3da..f30c060e17 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -43,6 +43,8 @@
 
 #define FRAGMENT_PIXELS 8
 
+static av_cold int vp3_decode_end(AVCodecContext *avctx);
+
 typedef struct Coeff {
     struct Coeff *next;
     DCTELEM coeff;
@@ -1684,6 +1686,11 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
     s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int));
     s->pixel_addresses_initialized = 0;
+    if (!s->superblock_coding || !s->all_fragments || !s->coeff_counts ||
+        !s->coeffs || !s->coded_fragment_list) {
+        vp3_decode_end(avctx);
+        return -1;
+    }
 
     if (!s->theora_tables)
     {
@@ -1784,6 +1791,11 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     s->superblock_macroblocks = av_malloc(s->superblock_count * 4 * sizeof(int));
     s->macroblock_fragments = av_malloc(s->macroblock_count * 6 * sizeof(int));
     s->macroblock_coding = av_malloc(s->macroblock_count + 1);
+    if (!s->superblock_fragments || !s->superblock_macroblocks ||
+        !s->macroblock_fragments || !s->macroblock_coding) {
+        vp3_decode_end(avctx);
+        return -1;
+    }
     init_block_mapping(s);
 
     for (i = 0; i < 3; i++) {

From beb0dc5f362a2789978c95f783e530940b1c905d Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 19:42:30 +0000
Subject: [PATCH 055/315] Check dimensions against 0 too.
 39_vorbis_zero_dims.patch from chrome

backport r19976 by michael


Originally committed as revision 21721 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 6ca8763bb3..a1e33babc6 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -249,8 +249,8 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) {
         }
 
         codebook_setup->dimensions=get_bits(gb, 16);
-        if (codebook_setup->dimensions>16) {
-            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook's dimension is too large (%d). \n", cb, codebook_setup->dimensions);
+        if (codebook_setup->dimensions>16||codebook_setup->dimensions==0) {
+            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook's dimension is invalid (%d). \n", cb, codebook_setup->dimensions);
             goto error;
         }
         entries=get_bits(gb, 24);

From 9ef13f70f4d38514fa82b998f7e62abb7940f4c1 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:44:06 +0000
Subject: [PATCH 056/315] = -> == typo. 27_vorbis_residue_loop_error.patch by
 chrome

backport r19982 by michael


Originally committed as revision 21722 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index a1e33babc6..cbde097b84 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -1492,7 +1492,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
         uint_fast8_t ch=0;
 
         for(j=0;j<vc->audio_channels;++j) {
-            if ((mapping->submaps==1) || (i=mapping->mux[j])) {
+            if ((mapping->submaps==1) || (i==mapping->mux[j])) {
                 res_chan[j]=res_num;
                 if (no_residue[j]) {
                     do_not_decode[ch]=1;

From 0d3f25eb483f19a34e3354970333b87e04120ae2 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:45:16 +0000
Subject: [PATCH 057/315] Sanity checks for magnitude and angle.
 26_vorbis_mag_angle_index.patch by chrome

backport r19983 by michael


Originally committed as revision 21723 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index cbde097b84..305ceadaa1 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -702,7 +702,14 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc) {
             for(j=0;j<mapping_setup->coupling_steps;++j) {
                 mapping_setup->magnitude[j]=get_bits(gb, ilog(vc->audio_channels-1));
                 mapping_setup->angle[j]=get_bits(gb, ilog(vc->audio_channels-1));
-                // FIXME: sanity checks
+                if (mapping_setup->magnitude[j]>=vc->audio_channels) {
+                    av_log(vc->avccontext, AV_LOG_ERROR, "magnitude channel %d out of range. \n", mapping_setup->magnitude[j]);
+                    return 1;
+                }
+                if (mapping_setup->angle[j]>=vc->audio_channels) {
+                    av_log(vc->avccontext, AV_LOG_ERROR, "angle channel %d out of range. \n", mapping_setup->angle[j]);
+                    return 1;
+                }
             }
         } else {
             mapping_setup->coupling_steps=0;

From 49487dfd780bf0e33ca9aec4210f6f1eef8d8ba7 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:46:22 +0000
Subject: [PATCH 058/315] Fix book_idx check. 25_vorbis_floor0_index.patch by
 chrome.

backport r19984 by michael


Originally committed as revision 21724 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 305ceadaa1..29e7bdbf41 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -558,12 +558,11 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
                 uint_fast8_t book_idx;
                 for (idx=0;idx<floor_setup->data.t0.num_books;++idx) {
                     book_idx=get_bits(gb, 8);
+                    if (book_idx>=vc->codebook_count)
+                        return 1;
                     floor_setup->data.t0.book_list[idx]=book_idx;
                     if (vc->codebooks[book_idx].dimensions > max_codebook_dim)
                         max_codebook_dim=vc->codebooks[book_idx].dimensions;
-
-                    if (floor_setup->data.t0.book_list[idx]>vc->codebook_count)
-                        return 1;
                 }
             }
 

From b8ec4c49bd751f23aa175dbb64ff8d75edb7b917 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:47:42 +0000
Subject: [PATCH 059/315] Check classbook value.
 11_vorbis_residue_book_index.patch by chrome.

r19989 by michael


Originally committed as revision 21725 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 29e7bdbf41..4410fc04c3 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -635,6 +635,10 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc){
         res_setup->partition_size=get_bits(gb, 24)+1;
         res_setup->classifications=get_bits(gb, 6)+1;
         res_setup->classbook=get_bits(gb, 8);
+        if (res_setup->classbook>=vc->codebook_count) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "classbook value %d out of range. \n", res_setup->classbook);
+            return 1;
+        }
 
         AV_DEBUG("    begin %d end %d part.size %d classif.s %d classbook %d \n", res_setup->begin, res_setup->end, res_setup->partition_size,
           res_setup->classifications, res_setup->classbook);

From eb70d77e1ebe3104b83506dcb5708b581bb09a00 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:49:28 +0000
Subject: [PATCH 060/315] Add checks for per-packet mode indexes and per-header
 mode mapping indexes. 12_vorbis_mode_indexes.patch by chrome maybe
 exploitable

r19990 by michael


Originally committed as revision 21726 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 4410fc04c3..bd8002eaf6 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -798,7 +798,11 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context *vc) {
         mode_setup->blockflag=get_bits1(gb);
         mode_setup->windowtype=get_bits(gb, 16); //FIXME check
         mode_setup->transformtype=get_bits(gb, 16); //FIXME check
-        mode_setup->mapping=get_bits(gb, 8); //FIXME check
+        mode_setup->mapping=get_bits(gb, 8);
+        if (mode_setup->mapping>=vc->mapping_count) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "mode mapping value %d out of range. \n", mode_setup->mapping);
+            return 1;
+        }
 
         AV_DEBUG(" %d mode: blockflag %d, windowtype %d, transformtype %d, mapping %d \n", i, mode_setup->blockflag, mode_setup->windowtype, mode_setup->transformtype, mode_setup->mapping);
     }
@@ -1458,6 +1462,10 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
     } else {
         mode_number=get_bits(gb, ilog(vc->mode_count-1));
     }
+    if (mode_number>=vc->mode_count) {
+        av_log(vc->avccontext, AV_LOG_ERROR, "mode number %d out of range.\n", mode_number);
+        return -1;
+    }
     vc->mode_number=mode_number;
     mapping=&vc->mappings[vc->modes[mode_number].mapping];
 

From dc5cc27d5ade462e7b2aa617285a4d64ee4000d7 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:50:47 +0000
Subject: [PATCH 061/315] Check masterbook index and subclass book index.
 14_floor_masterbook_index.patch by chrome

r19991 by michael


Originally committed as revision 21727 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index bd8002eaf6..cfcb29c42d 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -492,13 +492,23 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
                 AV_DEBUG(" %d floor %d class dim: %d subclasses %d \n", i, j, floor_setup->data.t1.class_dimensions[j], floor_setup->data.t1.class_subclasses[j]);
 
                 if (floor_setup->data.t1.class_subclasses[j]) {
-                    floor_setup->data.t1.class_masterbook[j]=get_bits(gb, 8);
+                    int bits=get_bits(gb, 8);
+                    if (bits>=vc->codebook_count) {
+                        av_log(vc->avccontext, AV_LOG_ERROR, "Masterbook index %d is out of range.\n", bits);
+                        return 1;
+                    }
+                    floor_setup->data.t1.class_masterbook[j]=bits;
 
                     AV_DEBUG("   masterbook: %d \n", floor_setup->data.t1.class_masterbook[j]);
                 }
 
                 for(k=0;k<(1<<floor_setup->data.t1.class_subclasses[j]);++k) {
-                    floor_setup->data.t1.subclass_books[j][k]=(int16_t)get_bits(gb, 8)-1;
+                    int16_t bits=get_bits(gb, 8)-1;
+                    if (bits!=-1 && bits>=vc->codebook_count) {
+                        av_log(vc->avccontext, AV_LOG_ERROR, "Subclass book index %d is out of range.\n", bits);
+                        return 1;
+                    }
+                    floor_setup->data.t1.subclass_books[j][k]=bits;
 
                     AV_DEBUG("    book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]);
                 }

From 736d36b79231d72699dce3449fc9d80c46d2aa19 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:52:27 +0000
Subject: [PATCH 062/315] Check  res_setup->books.
 15_more_residue_book_indexes.patch by chrome.

r19992 by michael


Originally committed as revision 21728 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index cfcb29c42d..e596d48d2c 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -668,7 +668,12 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc){
         for(j=0;j<res_setup->classifications;++j) {
             for(k=0;k<8;++k) {
                 if (cascade[j]&(1<<k)) {
-                        res_setup->books[j][k]=get_bits(gb, 8);
+                    int bits=get_bits(gb, 8);
+                    if (bits>=vc->codebook_count) {
+                        av_log(vc->avccontext, AV_LOG_ERROR, "book value %d out of range. \n", bits);
+                        return 1;
+                    }
+                    res_setup->books[j][k]=bits;
 
                     AV_DEBUG("     %d class casscade depth %d book: %d \n", j, k, res_setup->books[j][k]);
 

From 4f5ee3f87b143587309471e1c0fa804847939f65 Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:54:42 +0000
Subject: [PATCH 063/315] Check begin/end/partition_size.
 23_vorbis_sane_partition.patch by chrome. Also this should be better
 documented but i prefer not to leave potential security issues open due to
 missing documentation.

r19996 by michael


Originally committed as revision 21729 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index e596d48d2c..b70d5a51c4 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -37,6 +37,7 @@
 #define V_NB_BITS 8
 #define V_NB_BITS2 11
 #define V_MAX_VLCS (1<<16)
+#define V_MAX_PARTITIONS (1<<20)
 
 #ifndef V_DEBUG
 #define AV_DEBUG(...)
@@ -643,6 +644,14 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc){
         res_setup->begin=get_bits(gb, 24);
         res_setup->end=get_bits(gb, 24);
         res_setup->partition_size=get_bits(gb, 24)+1;
+        /* Validations to prevent a buffer overflow later. */
+        if (res_setup->begin>res_setup->end
+        || res_setup->end>vc->blocksize[1]/(res_setup->type==2?1:2)
+        || (res_setup->end-res_setup->begin)/res_setup->partition_size>V_MAX_PARTITIONS) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "partition out of bounds: type, begin, end, size, blocksize: %d, %d, %d, %d, %d\n", res_setup->type, res_setup->begin, res_setup->end, res_setup->partition_size, vc->blocksize[1]/2);
+            return 1;
+        }
+
         res_setup->classifications=get_bits(gb, 6)+1;
         res_setup->classbook=get_bits(gb, 8);
         if (res_setup->classbook>=vc->codebook_count) {

From 9e3935dfd8c68608534206859b16239a652db37f Mon Sep 17 00:00:00 2001
From: Google Chrome <>
Date: Tue, 9 Feb 2010 19:59:11 +0000
Subject: [PATCH 064/315] Check submap indexes. 10_vorbis_submap_indexes.patch
 by chrome. I am applying this even though Reimar had some comments to improve
 it as it fixes a serious security issue and I do not want to leave such
 things unfixed.

backport r20001 by michael


Originally committed as revision 21730 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index b70d5a51c4..6cfdf48ee7 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -757,9 +757,20 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc) {
         }
 
         for(j=0;j<mapping_setup->submaps;++j) {
+            int bits;
             skip_bits(gb, 8); // FIXME check?
-            mapping_setup->submap_floor[j]=get_bits(gb, 8);
-            mapping_setup->submap_residue[j]=get_bits(gb, 8);
+            bits=get_bits(gb, 8);
+            if (bits>=vc->floor_count) {
+                av_log(vc->avccontext, AV_LOG_ERROR, "submap floor value %d out of range. \n", bits);
+                return -1;
+            }
+            mapping_setup->submap_floor[j]=bits;
+            bits=get_bits(gb, 8);
+            if (bits>=vc->residue_count) {
+                av_log(vc->avccontext, AV_LOG_ERROR, "submap residue value %d out of range. \n", bits);
+                return -1;
+            }
+            mapping_setup->submap_residue[j]=bits;
 
             AV_DEBUG("   %d mapping %d submap : floor %d, residue %d \n", i, j, mapping_setup->submap_floor[j], mapping_setup->submap_residue[j]);
         }

From e5bea45df76a4888e37bc33f8cc803ec15d3ab7b Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 9 Feb 2010 20:28:42 +0000
Subject: [PATCH 065/315] Add a lock manager API to libavcodec.

Allows an application to register a callback that manages mutexes
on behalf of FFmpeg.
With this callback registered FFmpeg is fully thread safe.

backport r19025 by andoma

NB: This is a feature backport with little regression potential. It was
requested at FOSDEM 2010 by ben@geexbox.org for use by geexbox and the
enna mediacenter in the upcoming debian/squeeze and ubuntu/lucid
release.

Approved by DonDiego on #ffmpeg-devel


Originally committed as revision 21731 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog            |  2 +-
 doc/APIchanges       |  8 ++++++++
 libavcodec/avcodec.h | 28 +++++++++++++++++++++++++++-
 libavcodec/utils.c   | 40 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/Changelog b/Changelog
index 3d13d8de80..a3d3da02e7 100644
--- a/Changelog
+++ b/Changelog
@@ -12,7 +12,7 @@ version 0.5.1:
 - (L)GPL license upgrade support
 - AMR-NB decoding/encoding, AMR-WB decoding via OpenCORE libraries
 - enable symbol versioning by default for linkers that support it
-
+- backport av_lockmgr_register(), see doc/APIchanges for details
 
 
 version 0.5:
diff --git a/doc/APIchanges b/doc/APIchanges
index 07e680a04d..db7e245a09 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -1,3 +1,11 @@
+20090601 - r19025 - lavc 52.30.0 - av_lockmgr_register()
+  av_lockmgr_register() can be used to register a callback function
+  that lavc (and in the future, libraries that depend on lavc) can use
+  to implement mutexes. The application should provide a callback function
+  the implements the AV_LOCK_* operations described in avcodec.h.
+  When the lock manager is registered FFmpeg is guaranteed to behave
+  correct also in a multi-threaded application.
+
 20090301 - r17682 - lavf 52.31.0 - Generic metadata API
   This version introduce a new metadata API (see av_metadata_get() and friends).
   The old API is now deprecated and shouldn't be used anymore. This especially
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index b078bc98c2..0794fbef68 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,7 +30,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 20
+#define LIBAVCODEC_VERSION_MINOR 21
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
@@ -3344,4 +3344,30 @@ void av_register_hwaccel(AVHWAccel *hwaccel);
  */
 AVHWAccel *av_hwaccel_next(AVHWAccel *hwaccel);
 
+
+/**
+ * Lock operation used by lockmgr
+ */
+enum AVLockOp {
+  AV_LOCK_CREATE,  ///< Create a mutex
+  AV_LOCK_OBTAIN,  ///< Lock the mutex
+  AV_LOCK_RELEASE, ///< Unlock the mutex
+  AV_LOCK_DESTROY, ///< Free mutex resources
+};
+
+/**
+ * Register a user provided lock manager supporting the operations
+ * specified by AVLockOp. \p mutex points to a (void *) where the
+ * lockmgr should store/get a pointer to a user allocated mutex. It's
+ * NULL upon AV_LOCK_CREATE and != NULL for all other ops.
+ *
+ * @param cb User defined callback. Note: FFmpeg may invoke calls to this
+ *           callback during the call to av_lockmgr_register().
+ *           Thus, the application must be prepared to handle that.
+ *           If cb is set to NULL the lockmgr will be unregistered.
+ *           Also note that during unregistration the previously registered
+ *           lockmgr callback may also be invoked.
+ */
+int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op));
+
 #endif /* AVCODEC_AVCODEC_H */
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index b6412a5313..f628774cd7 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -65,6 +65,8 @@ const uint8_t ff_reverse[256]={
 };
 
 static int volatile entangled_thread_counter=0;
+int (*ff_lockmgr_cb)(void **mutex, enum AVLockOp op);
+static void *codec_mutex;
 
 void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size)
 {
@@ -420,6 +422,12 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec)
 {
     int ret= -1;
 
+    /* If there is a user-supplied mutex locking routine, call it. */
+    if (ff_lockmgr_cb) {
+        if ((*ff_lockmgr_cb)(&codec_mutex, AV_LOCK_OBTAIN))
+            return -1;
+    }
+
     entangled_thread_counter++;
     if(entangled_thread_counter != 1){
         av_log(avctx, AV_LOG_ERROR, "insufficient thread locking around avcodec_open/close()\n");
@@ -464,6 +472,11 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec)
     ret=0;
 end:
     entangled_thread_counter--;
+
+    /* Release any user-supplied mutex. */
+    if (ff_lockmgr_cb) {
+        (*ff_lockmgr_cb)(&codec_mutex, AV_LOCK_RELEASE);
+    }
     return ret;
 }
 
@@ -583,6 +596,12 @@ int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
 
 int avcodec_close(AVCodecContext *avctx)
 {
+    /* If there is a user-supplied mutex locking routine, call it. */
+    if (ff_lockmgr_cb) {
+        if ((*ff_lockmgr_cb)(&codec_mutex, AV_LOCK_OBTAIN))
+            return -1;
+    }
+
     entangled_thread_counter++;
     if(entangled_thread_counter != 1){
         av_log(avctx, AV_LOG_ERROR, "insufficient thread locking around avcodec_open/close()\n");
@@ -598,6 +617,11 @@ int avcodec_close(AVCodecContext *avctx)
     av_freep(&avctx->priv_data);
     avctx->codec = NULL;
     entangled_thread_counter--;
+
+    /* Release any user-supplied mutex. */
+    if (ff_lockmgr_cb) {
+        (*ff_lockmgr_cb)(&codec_mutex, AV_LOCK_RELEASE);
+    }
     return 0;
 }
 
@@ -1152,3 +1176,19 @@ AVHWAccel *ff_find_hwaccel(enum CodecID codec_id, enum PixelFormat pix_fmt)
     }
     return NULL;
 }
+
+int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op))
+{
+    if (ff_lockmgr_cb) {
+        if (ff_lockmgr_cb(&codec_mutex, AV_LOCK_DESTROY))
+            return -1;
+    }
+
+    ff_lockmgr_cb = cb;
+
+    if (ff_lockmgr_cb) {
+        if (ff_lockmgr_cb(&codec_mutex, AV_LOCK_CREATE))
+            return -1;
+    }
+    return 0;
+}

From afc97d47358f8c4ce0e9989bcd308806d3770409 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 11 Feb 2010 11:52:59 +0000
Subject: [PATCH 066/315] reverting objected hunks from previous commit

as discussed with diego on irc, the spurious newline deletion and the
LIBAVCODEC_VERSION_MINOR bump are being reverted based on comments on
ffmpeg-cvslog by ramiro, uoti and michael.

See http://comments.gmane.org/gmane.comp.video.ffmpeg.cvs/28112 for the
full context.


Originally committed as revision 21755 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog            | 1 +
 libavcodec/avcodec.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Changelog b/Changelog
index a3d3da02e7..abcdac3a3e 100644
--- a/Changelog
+++ b/Changelog
@@ -15,6 +15,7 @@ version 0.5.1:
 - backport av_lockmgr_register(), see doc/APIchanges for details
 
 
+
 version 0.5:
 
 - The "device" muxers and demuxers are now in a new libavdevice library
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 0794fbef68..835b589ded 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,7 +30,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 21
+#define LIBAVCODEC_VERSION_MINOR 20
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \

From 9d442d2d7d24cae9eeaba1f8eee7ec58a4e7d272 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 11 Feb 2010 16:22:59 +0000
Subject: [PATCH 067/315] Fix crash when max_ref_frames was out of range. This
 might have been exploitable. Fixes first crash of issue840.

backport r18388 by michael


Originally committed as revision 21757 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/snow.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index b4a0d5a8fd..5a8bcb8fb6 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -3554,7 +3554,7 @@ static void decode_qlogs(SnowContext *s){
 }
 
 static int decode_header(SnowContext *s){
-    int plane_index;
+    int plane_index, tmp;
     uint8_t kstate[32];
 
     memset(kstate, MID_STATE, sizeof(kstate));
@@ -3583,7 +3583,12 @@ static int decode_header(SnowContext *s){
         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
         s->spatial_scalability= get_rac(&s->c, s->header_state);
 //        s->rate_scalability= get_rac(&s->c, s->header_state);
-        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
+        tmp= get_symbol(&s->c, s->header_state, 0)+1;
+        if(tmp < 1 || tmp > MAX_REF_FRAMES){
+            av_log(s->avctx, AV_LOG_ERROR, "reference frame count is %d\n", tmp);
+            return -1;
+        }
+        s->max_ref_frames= tmp;
 
         decode_qlogs(s);
     }
@@ -3649,6 +3654,7 @@ static av_cold int common_init(AVCodecContext *avctx){
     int i, j;
 
     s->avctx= avctx;
+    s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
 
     dsputil_init(&s->dsp, avctx);
 

From 48b98cdc677370b8251b0491820ce695b7d76a23 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 11 Feb 2010 20:57:49 +0000
Subject: [PATCH 068/315] Make sure the block array is of the correct size.
 This might have been exploitable.

backported r18393 by michael


Originally committed as revision 21758 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/snow.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 5a8bcb8fb6..fbffce8429 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1626,6 +1626,7 @@ static int alloc_blocks(SnowContext *s){
     s->b_width = w;
     s->b_height= h;
 
+    av_free(s->block);
     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
     return 0;
 }
@@ -4515,7 +4516,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const
                                               && p->hcoeff[2]==2;
     }
 
-    if(!s->block) alloc_blocks(s);
+    alloc_blocks(s);
 
     frame_start(s);
     //keyframe flag duplication mess FIXME

From 9593c80062e97b819a016dc36b6060d29667efaf Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 11 Feb 2010 21:03:30 +0000
Subject: [PATCH 069/315] Fix crash in MLP decoder due to integer overflow.
 Probably only DoS, init_get_bits sets buffer to NULL, thus causing a
 NULL-dereference directly after.

backport r21426 by reimar


Originally committed as revision 21759 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/mlpdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 99754b329c..f1668afea0 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -878,7 +878,7 @@ static int read_access_unit(AVCodecContext *avctx, void* data, int *data_size,
 
     length = (AV_RB16(buf) & 0xfff) * 2;
 
-    if (length > buf_size)
+    if (length < 4 || length > buf_size)
         return -1;
 
     init_get_bits(&gb, (buf + 4), (length - 4) * 8);

From 26f74e832b9fa0bd931d555d74adb6aa9be126ae Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Mon, 15 Feb 2010 12:45:14 +0000
Subject: [PATCH 070/315] cosmetics: K&R coding style, prettyprinting

backported r20083 by diego

This commit does not introduce functional changes.  It was applied in
order to faciliate reviewing the proposed libx264.c backport


Originally committed as revision 21832 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/libx264.c | 213 +++++++++++++++++++++----------------------
 1 file changed, 105 insertions(+), 108 deletions(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index d82756b62f..d2d3b270d0 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -27,14 +27,13 @@
 #include <string.h>
 
 typedef struct X264Context {
-    x264_param_t params;
-    x264_t *enc;
-    x264_picture_t pic;
-    AVFrame out_pic;
+    x264_param_t    params;
+    x264_t         *enc;
+    x264_picture_t  pic;
+    AVFrame         out_pic;
 } X264Context;
 
-static void
-X264_log(void *p, int level, const char *fmt, va_list args)
+static void X264_log(void *p, int level, const char *fmt, va_list args)
 {
     static const int level_map[] = {
         [X264_LOG_ERROR]   = AV_LOG_ERROR,
@@ -43,20 +42,19 @@ X264_log(void *p, int level, const char *fmt, va_list args)
         [X264_LOG_DEBUG]   = AV_LOG_DEBUG
     };
 
-    if(level < 0 || level > X264_LOG_DEBUG)
+    if (level < 0 || level > X264_LOG_DEBUG)
         return;
 
     av_vlog(p, level_map[level], fmt, args);
 }
 
 
-static int
-encode_nals(uint8_t *buf, int size, x264_nal_t *nals, int nnal)
+static int encode_nals(uint8_t *buf, int size, x264_nal_t *nals, int nnal)
 {
     uint8_t *p = buf;
     int i;
 
-    for(i = 0; i < nnal; i++){
+    for (i = 0; i < nnal; i++) {
         int s = x264_nal_encode(p, &size, 1, nals + i);
         if(s < 0)
             return -1;
@@ -66,8 +64,8 @@ encode_nals(uint8_t *buf, int size, x264_nal_t *nals, int nnal)
     return p - buf;
 }
 
-static int
-X264_frame(AVCodecContext *ctx, uint8_t *buf, int bufsize, void *data)
+static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
+                      int bufsize, void *data)
 {
     X264Context *x4 = ctx->priv_data;
     AVFrame *frame = data;
@@ -75,31 +73,31 @@ X264_frame(AVCodecContext *ctx, uint8_t *buf, int bufsize, void *data)
     int nnal, i;
     x264_picture_t pic_out;
 
-    x4->pic.img.i_csp = X264_CSP_I420;
+    x4->pic.img.i_csp   = X264_CSP_I420;
     x4->pic.img.i_plane = 3;
 
     if (frame) {
-        for(i = 0; i < 3; i++){
+        for (i = 0; i < 3; i++) {
             x4->pic.img.plane[i] = frame->data[i];
             x4->pic.img.i_stride[i] = frame->linesize[i];
         }
 
-        x4->pic.i_pts = frame->pts;
+        x4->pic.i_pts  = frame->pts;
         x4->pic.i_type = X264_TYPE_AUTO;
     }
 
-    if(x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL,
-                           &pic_out))
+    if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL,
+                            &pic_out))
         return -1;
 
     bufsize = encode_nals(buf, bufsize, nal, nnal);
-    if(bufsize < 0)
+    if (bufsize < 0)
         return -1;
 
     /* FIXME: dts */
     x4->out_pic.pts = pic_out.i_pts;
 
-    switch(pic_out.i_type){
+    switch (pic_out.i_type) {
     case X264_TYPE_IDR:
     case X264_TYPE_I:
         x4->out_pic.pict_type = FF_I_TYPE;
@@ -114,174 +112,173 @@ X264_frame(AVCodecContext *ctx, uint8_t *buf, int bufsize, void *data)
     }
 
     x4->out_pic.key_frame = pic_out.i_type == X264_TYPE_IDR;
-    x4->out_pic.quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
+    x4->out_pic.quality   = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
 
     return bufsize;
 }
 
-static av_cold int
-X264_close(AVCodecContext *avctx)
+static av_cold int X264_close(AVCodecContext *avctx)
 {
     X264Context *x4 = avctx->priv_data;
 
     av_freep(&avctx->extradata);
 
-    if(x4->enc)
+    if (x4->enc)
         x264_encoder_close(x4->enc);
 
     return 0;
 }
 
-static av_cold int
-X264_init(AVCodecContext *avctx)
+static av_cold int X264_init(AVCodecContext *avctx)
 {
     X264Context *x4 = avctx->priv_data;
 
     x264_param_default(&x4->params);
 
-    x4->params.pf_log = X264_log;
-    x4->params.p_log_private = avctx;
+    x4->params.pf_log               = X264_log;
+    x4->params.p_log_private        = avctx;
 
-    x4->params.i_keyint_max = avctx->gop_size;
-    x4->params.rc.i_bitrate = avctx->bit_rate / 1000;
+    x4->params.i_keyint_max         = avctx->gop_size;
+    x4->params.rc.i_bitrate         = avctx->bit_rate       / 1000;
     x4->params.rc.i_vbv_buffer_size = avctx->rc_buffer_size / 1000;
-    x4->params.rc.i_vbv_max_bitrate = avctx->rc_max_rate / 1000;
-    x4->params.rc.b_stat_write = avctx->flags & CODEC_FLAG_PASS1;
-    if(avctx->flags & CODEC_FLAG_PASS2) x4->params.rc.b_stat_read = 1;
-    else{
-        if(avctx->crf){
-            x4->params.rc.i_rc_method = X264_RC_CRF;
+    x4->params.rc.i_vbv_max_bitrate = avctx->rc_max_rate    / 1000;
+    x4->params.rc.b_stat_write      = avctx->flags & CODEC_FLAG_PASS1;
+    if (avctx->flags & CODEC_FLAG_PASS2) {
+        x4->params.rc.b_stat_read = 1;
+    } else {
+        if (avctx->crf) {
+            x4->params.rc.i_rc_method   = X264_RC_CRF;
             x4->params.rc.f_rf_constant = avctx->crf;
-        }else if(avctx->cqp > -1){
-            x4->params.rc.i_rc_method = X264_RC_CQP;
+        } else if (avctx->cqp > -1) {
+            x4->params.rc.i_rc_method   = X264_RC_CQP;
             x4->params.rc.i_qp_constant = avctx->cqp;
         }
     }
 
     // if neither crf nor cqp modes are selected we have to enable the RC
     // we do it this way because we cannot check if the bitrate has been set
-    if(!(avctx->crf || (avctx->cqp > -1))) x4->params.rc.i_rc_method = X264_RC_ABR;
+    if (!(avctx->crf || (avctx->cqp > -1)))
+        x4->params.rc.i_rc_method = X264_RC_ABR;
 
-    x4->params.i_bframe = avctx->max_b_frames;
-    x4->params.b_cabac = avctx->coder_type == FF_CODER_TYPE_AC;
+    x4->params.i_bframe          = avctx->max_b_frames;
+    x4->params.b_cabac           = avctx->coder_type == FF_CODER_TYPE_AC;
     x4->params.i_bframe_adaptive = avctx->b_frame_strategy;
-    x4->params.i_bframe_bias = avctx->bframebias;
-    x4->params.b_bframe_pyramid = avctx->flags2 & CODEC_FLAG2_BPYRAMID;
-    avctx->has_b_frames= avctx->flags2 & CODEC_FLAG2_BPYRAMID ? 2 : !!avctx->max_b_frames;
+    x4->params.i_bframe_bias     = avctx->bframebias;
+    x4->params.b_bframe_pyramid  = avctx->flags2 & CODEC_FLAG2_BPYRAMID;
+    avctx->has_b_frames          = avctx->flags2 & CODEC_FLAG2_BPYRAMID ? 2 : !!avctx->max_b_frames;
 
     x4->params.i_keyint_min = avctx->keyint_min;
-    if(x4->params.i_keyint_min > x4->params.i_keyint_max)
+    if (x4->params.i_keyint_min > x4->params.i_keyint_max)
         x4->params.i_keyint_min = x4->params.i_keyint_max;
 
-    x4->params.i_scenecut_threshold = avctx->scenechange_threshold;
+    x4->params.i_scenecut_threshold        = avctx->scenechange_threshold;
 
-    x4->params.b_deblocking_filter = avctx->flags & CODEC_FLAG_LOOP_FILTER;
+    x4->params.b_deblocking_filter         = avctx->flags & CODEC_FLAG_LOOP_FILTER;
     x4->params.i_deblocking_filter_alphac0 = avctx->deblockalpha;
-    x4->params.i_deblocking_filter_beta = avctx->deblockbeta;
+    x4->params.i_deblocking_filter_beta    = avctx->deblockbeta;
 
-    x4->params.rc.i_qp_min = avctx->qmin;
-    x4->params.rc.i_qp_max = avctx->qmax;
-    x4->params.rc.i_qp_step = avctx->max_qdiff;
+    x4->params.rc.i_qp_min                 = avctx->qmin;
+    x4->params.rc.i_qp_max                 = avctx->qmax;
+    x4->params.rc.i_qp_step                = avctx->max_qdiff;
 
-    x4->params.rc.f_qcompress = avctx->qcompress;  /* 0.0 => cbr, 1.0 => constant qp */
-    x4->params.rc.f_qblur = avctx->qblur;        /* temporally blur quants */
+    x4->params.rc.f_qcompress       = avctx->qcompress; /* 0.0 => cbr, 1.0 => constant qp */
+    x4->params.rc.f_qblur           = avctx->qblur;     /* temporally blur quants */
     x4->params.rc.f_complexity_blur = avctx->complexityblur;
 
-    x4->params.i_frame_reference = avctx->refs;
+    x4->params.i_frame_reference    = avctx->refs;
 
-    x4->params.i_width = avctx->width;
-    x4->params.i_height = avctx->height;
-    x4->params.vui.i_sar_width = avctx->sample_aspect_ratio.num;
-    x4->params.vui.i_sar_height = avctx->sample_aspect_ratio.den;
-    x4->params.i_fps_num = avctx->time_base.den;
-    x4->params.i_fps_den = avctx->time_base.num;
+    x4->params.i_width              = avctx->width;
+    x4->params.i_height             = avctx->height;
+    x4->params.vui.i_sar_width      = avctx->sample_aspect_ratio.num;
+    x4->params.vui.i_sar_height     = avctx->sample_aspect_ratio.den;
+    x4->params.i_fps_num            = avctx->time_base.den;
+    x4->params.i_fps_den            = avctx->time_base.num;
 
-    x4->params.analyse.inter = 0;
-    if(avctx->partitions){
-        if(avctx->partitions & X264_PART_I4X4)
+    x4->params.analyse.inter    = 0;
+    if (avctx->partitions) {
+        if (avctx->partitions & X264_PART_I4X4)
             x4->params.analyse.inter |= X264_ANALYSE_I4x4;
-        if(avctx->partitions & X264_PART_I8X8)
+        if (avctx->partitions & X264_PART_I8X8)
             x4->params.analyse.inter |= X264_ANALYSE_I8x8;
-        if(avctx->partitions & X264_PART_P8X8)
+        if (avctx->partitions & X264_PART_P8X8)
             x4->params.analyse.inter |= X264_ANALYSE_PSUB16x16;
-        if(avctx->partitions & X264_PART_P4X4)
+        if (avctx->partitions & X264_PART_P4X4)
             x4->params.analyse.inter |= X264_ANALYSE_PSUB8x8;
-        if(avctx->partitions & X264_PART_B8X8)
+        if (avctx->partitions & X264_PART_B8X8)
             x4->params.analyse.inter |= X264_ANALYSE_BSUB16x16;
     }
 
-    x4->params.analyse.i_direct_mv_pred = avctx->directpred;
+    x4->params.analyse.i_direct_mv_pred  = avctx->directpred;
 
     x4->params.analyse.b_weighted_bipred = avctx->flags2 & CODEC_FLAG2_WPRED;
 
-    if(avctx->me_method == ME_EPZS)
+    if (avctx->me_method == ME_EPZS)
         x4->params.analyse.i_me_method = X264_ME_DIA;
-    else if(avctx->me_method == ME_HEX)
+    else if (avctx->me_method == ME_HEX)
         x4->params.analyse.i_me_method = X264_ME_HEX;
-    else if(avctx->me_method == ME_UMH)
+    else if (avctx->me_method == ME_UMH)
         x4->params.analyse.i_me_method = X264_ME_UMH;
-    else if(avctx->me_method == ME_FULL)
+    else if (avctx->me_method == ME_FULL)
         x4->params.analyse.i_me_method = X264_ME_ESA;
-    else if(avctx->me_method == ME_TESA)
+    else if (avctx->me_method == ME_TESA)
         x4->params.analyse.i_me_method = X264_ME_TESA;
     else x4->params.analyse.i_me_method = X264_ME_HEX;
 
-    x4->params.analyse.i_me_range = avctx->me_range;
-    x4->params.analyse.i_subpel_refine = avctx->me_subpel_quality;
+    x4->params.analyse.i_me_range         = avctx->me_range;
+    x4->params.analyse.i_subpel_refine    = avctx->me_subpel_quality;
 
-    x4->params.analyse.b_mixed_references =
-        avctx->flags2 & CODEC_FLAG2_MIXED_REFS;
-    x4->params.analyse.b_chroma_me = avctx->me_cmp & FF_CMP_CHROMA;
-    x4->params.analyse.b_transform_8x8 = avctx->flags2 & CODEC_FLAG2_8X8DCT;
-    x4->params.analyse.b_fast_pskip = avctx->flags2 & CODEC_FLAG2_FASTPSKIP;
+    x4->params.analyse.b_mixed_references = avctx->flags2 & CODEC_FLAG2_MIXED_REFS;
+    x4->params.analyse.b_chroma_me        = avctx->me_cmp & FF_CMP_CHROMA;
+    x4->params.analyse.b_transform_8x8    = avctx->flags2 & CODEC_FLAG2_8X8DCT;
+    x4->params.analyse.b_fast_pskip       = avctx->flags2 & CODEC_FLAG2_FASTPSKIP;
 
-    x4->params.analyse.i_trellis = avctx->trellis;
-    x4->params.analyse.i_noise_reduction = avctx->noise_reduction;
+    x4->params.analyse.i_trellis          = avctx->trellis;
+    x4->params.analyse.i_noise_reduction  = avctx->noise_reduction;
 
-    if(avctx->level > 0) x4->params.i_level_idc = avctx->level;
+    if (avctx->level > 0)
+        x4->params.i_level_idc = avctx->level;
 
     x4->params.rc.f_rate_tolerance =
         (float)avctx->bit_rate_tolerance/avctx->bit_rate;
 
-    if((avctx->rc_buffer_size != 0) &&
-            (avctx->rc_initial_buffer_occupancy <= avctx->rc_buffer_size)){
+    if ((avctx->rc_buffer_size != 0) &&
+        (avctx->rc_initial_buffer_occupancy <= avctx->rc_buffer_size)) {
         x4->params.rc.f_vbv_buffer_init =
-            (float)avctx->rc_initial_buffer_occupancy/avctx->rc_buffer_size;
-    }
-    else x4->params.rc.f_vbv_buffer_init = 0.9;
+            (float)avctx->rc_initial_buffer_occupancy / avctx->rc_buffer_size;
+    } else
+        x4->params.rc.f_vbv_buffer_init = 0.9;
 
-    x4->params.rc.f_ip_factor = 1/fabs(avctx->i_quant_factor);
-    x4->params.rc.f_pb_factor = avctx->b_quant_factor;
+    x4->params.rc.f_ip_factor             = 1 / fabs(avctx->i_quant_factor);
+    x4->params.rc.f_pb_factor             = avctx->b_quant_factor;
     x4->params.analyse.i_chroma_qp_offset = avctx->chromaoffset;
 
     x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
-    x4->params.i_log_level = X264_LOG_DEBUG;
+    x4->params.i_log_level    = X264_LOG_DEBUG;
 
-    x4->params.b_aud = avctx->flags2 & CODEC_FLAG2_AUD;
+    x4->params.b_aud          = avctx->flags2 & CODEC_FLAG2_AUD;
 
-    x4->params.i_threads = avctx->thread_count;
+    x4->params.i_threads      = avctx->thread_count;
 
-    x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
+    x4->params.b_interlaced   = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
 
-    if(avctx->flags & CODEC_FLAG_GLOBAL_HEADER){
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER)
         x4->params.b_repeat_headers = 0;
-    }
 
     x4->enc = x264_encoder_open(&x4->params);
-    if(!x4->enc)
+    if (!x4->enc)
         return -1;
 
     avctx->coded_frame = &x4->out_pic;
 
-    if(avctx->flags & CODEC_FLAG_GLOBAL_HEADER){
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
         x264_nal_t *nal;
         int nnal, i, s = 0;
 
         x264_encoder_headers(x4->enc, &nal, &nnal);
 
         /* 5 bytes NAL header + worst case escaping */
-        for(i = 0; i < nnal; i++)
-            s += 5 + nal[i].i_payload * 4 / 3;
+        for (i = 0; i < nnal; i++)
+             s += 5 + nal[i].i_payload * 4 / 3;
 
         avctx->extradata = av_malloc(s);
         avctx->extradata_size = encode_nals(avctx->extradata, s, nal, nnal);
@@ -291,14 +288,14 @@ X264_init(AVCodecContext *avctx)
 }
 
 AVCodec libx264_encoder = {
-    .name = "libx264",
-    .type = CODEC_TYPE_VIDEO,
-    .id = CODEC_ID_H264,
+    .name           = "libx264",
+    .type           = CODEC_TYPE_VIDEO,
+    .id             = CODEC_ID_H264,
     .priv_data_size = sizeof(X264Context),
-    .init = X264_init,
-    .encode = X264_frame,
-    .close = X264_close,
-    .capabilities = CODEC_CAP_DELAY,
-    .pix_fmts = (enum PixelFormat[]) { PIX_FMT_YUV420P, PIX_FMT_NONE },
-    .long_name = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
+    .init           = X264_init,
+    .encode         = X264_frame,
+    .close          = X264_close,
+    .capabilities   = CODEC_CAP_DELAY,
+    .pix_fmts       = (enum PixelFormat[]) { PIX_FMT_YUV420P, PIX_FMT_NONE },
+    .long_name      = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
 };

From a0244ae347a8d58e88b7cf127e65a51d15d1c157 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 16 Feb 2010 23:04:10 +0000
Subject: [PATCH 071/315] misc. manpage updates, fixes LP: #501729, Debian:
 #570050

Update ffmpeg documentation regarding metadata setting. -title,
-author, -copyright, -track, -album, and -year options have been
dropped in favor of -metadata.
Add an explanation and complete the metadata usage example.

backported revisions r19285, r19287 and r19320 by stefano.


Originally committed as revision 21858 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 doc/ffmpeg-doc.texi | 45 +++++++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/doc/ffmpeg-doc.texi b/doc/ffmpeg-doc.texi
index e3f2dc8727..536e0fa36e 100644
--- a/doc/ffmpeg-doc.texi
+++ b/doc/ffmpeg-doc.texi
@@ -141,8 +141,9 @@ to get the desired audio language.
 
 NOTE: To see the supported input formats, use @code{ffmpeg -formats}.
 
-* You can extract images from a video:
+* You can extract images from a video, or create a video from many images:
 
+For extracting images from a video:
 @example
 ffmpeg -i foo.avi -r 1 -s WxH -f image2 foo-%03d.jpeg
 @end example
@@ -151,15 +152,20 @@ This will extract one video frame per second from the video and will
 output them in files named @file{foo-001.jpeg}, @file{foo-002.jpeg},
 etc. Images will be rescaled to fit the new WxH values.
 
+If you want to extract just a limited number of frames, you can use the
+above command in combination with the -vframes or -t option, or in
+combination with -ss to start extracting from a certain point in time.
+
+For creating a video from many images:
+@example
+ffmpeg -f image2 -i foo-%03d.jpeg -r 12 -s WxH foo.avi
+@end example
+
 The syntax @code{foo-%03d.jpeg} specifies to use a decimal number
 composed of three digits padded with zeroes to express the sequence
 number. It is the same syntax supported by the C printf function, but
 only formats accepting a normal integer are suitable.
 
-If you want to extract just a limited number of frames, you can use the
-above command in combination with the -vframes or -t option, or in
-combination with -ss to start extracting from a certain point in time.
-
 * You can put many streams of the same type in the output:
 
 @example
@@ -276,29 +282,16 @@ The offset is added to the timestamps of the input files.
 Specifying a positive offset means that the corresponding
 streams are delayed by 'offset' seconds.
 
-@item -title @var{string}
-Set the title.
-
 @item -timestamp @var{time}
 Set the timestamp.
 
-@item -author @var{string}
-Set the author.
+@item -metadata @var{key}=@var{value}
+Set a metadata key/value pair.
 
-@item -copyright @var{string}
-Set the copyright.
-
-@item -comment @var{string}
-Set the comment.
-
-@item -album @var{string}
-Set the album.
-
-@item -track @var{number}
-Set the track.
-
-@item -year @var{number}
-Set the year.
+For example, for setting the title in the output file:
+@example
+ffmpeg -i in.avi -metadata title="my title" out.flv
+@end example
 
 @item -v @var{number}
 Set the logging verbosity level.
@@ -687,6 +680,8 @@ Set the number of audio frames to record.
 Set the audio sampling frequency (default = 44100 Hz).
 @item -ab @var{bitrate}
 Set the audio bitrate in bit/s (default = 64k).
+@item -aq @var{q}
+Set the audio quality (codec-specific, VBR).
 @item -ac @var{channels}
 Set the number of audio channels (default = 1).
 @item -an
@@ -728,6 +723,8 @@ Force subtitle codec ('copy' to copy stream).
 Add a new subtitle stream to the current output stream.
 @item -slang @var{code}
 Set the ISO 639 language code (3 letters) of the current subtitle stream.
+@item -sn
+Disable subtitle recording.
 @item -sbsf @var{bitstream_filter}
 Bitstream filters available are "mov2textsub", "text2movsub".
 @example

From 53b90bb25edfd608cb6ae9201ca42052bb54b62f Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Wed, 24 Feb 2010 22:40:10 +0000
Subject: [PATCH 072/315] backport libx264.c from trunk

now compiles with x264 API versions 65 up to 85

patch prepared by darkshikari


Originally committed as revision 22042 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure                                     |   2 +-
 ffpresets/libx264-baseline.ffpreset           |   3 +-
 ffpresets/libx264-default.ffpreset            |   8 +-
 ffpresets/libx264-fastfirstpass.ffpreset      |   7 +-
 ffpresets/libx264-hq.ffpreset                 |   5 +-
 ffpresets/libx264-ipod320.ffpreset            |   3 +-
 ffpresets/libx264-ipod640.ffpreset            |   3 +-
 ffpresets/libx264-lossless_fast.ffpreset      |   3 +-
 ffpresets/libx264-lossless_max.ffpreset       |   3 +-
 ffpresets/libx264-lossless_medium.ffpreset    |   3 +-
 ffpresets/libx264-lossless_slow.ffpreset      |   3 +-
 ffpresets/libx264-lossless_slower.ffpreset    |   3 +-
 ffpresets/libx264-lossless_ultrafast.ffpreset |   2 +-
 ffpresets/libx264-main.ffpreset               |   2 +-
 ffpresets/libx264-max.ffpreset                |   9 +-
 ffpresets/libx264-normal.ffpreset             |   5 +-
 ffpresets/libx264-slowfirstpass.ffpreset      |   5 +-
 libavcodec/avcodec.h                          |  11 ++
 libavcodec/libx264.c                          | 100 +++++++++++++++---
 libavcodec/options.c                          |   2 +
 20 files changed, 144 insertions(+), 38 deletions(-)

diff --git a/configure b/configure
index 3e74ab8311..e96db9e352 100755
--- a/configure
+++ b/configure
@@ -2017,7 +2017,7 @@ enabled libschroedinger && add_cflags $(pkg-config --cflags schroedinger-1.0) &&
 enabled libspeex   && require  libspeex speex/speex.h speex_decoder_init -lspeex
 enabled libtheora  && require  libtheora theora/theora.h theora_info_init -ltheora -logg
 enabled libvorbis  && require  libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg
-enabled libx264    && require  libx264 x264.h x264_encoder_open -lx264 -lm &&
+enabled libx264    && require  libx264 x264.h x264_encoder_encode -lx264 -lm &&
                       { check_cpp_condition x264.h "X264_BUILD >= 65" ||
                         die "ERROR: libx264 version must be >= 0.65."; }
 enabled libxvid    && require  libxvid xvid.h xvid_global -lxvidcore
diff --git a/ffpresets/libx264-baseline.ffpreset b/ffpresets/libx264-baseline.ffpreset
index 47664189b2..a23b746ded 100644
--- a/ffpresets/libx264-baseline.ffpreset
+++ b/ffpresets/libx264-baseline.ffpreset
@@ -1,3 +1,4 @@
 coder=0
 bf=0
-flags2=-wpred-dct8x8
+flags2=-wpred-dct8x8+mbtree
+wpredp=0
diff --git a/ffpresets/libx264-default.ffpreset b/ffpresets/libx264-default.ffpreset
index ec20174c20..a10c633240 100644
--- a/ffpresets/libx264-default.ffpreset
+++ b/ffpresets/libx264-default.ffpreset
@@ -3,7 +3,7 @@ flags=+loop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partb8x8
 me_method=hex
-subq=6
+subq=7
 me_range=16
 g=250
 keyint_min=25
@@ -14,5 +14,9 @@ qcomp=0.6
 qmin=10
 qmax=51
 qdiff=4
+bf=3
+refs=3
 directpred=1
-flags2=+fastpskip
+trellis=1
+flags2=+mixed_refs+wpred+dct8x8+fastpskip+mbtree
+wpredp=2
diff --git a/ffpresets/libx264-fastfirstpass.ffpreset b/ffpresets/libx264-fastfirstpass.ffpreset
index aaad4615d0..7cf6faae41 100644
--- a/ffpresets/libx264-fastfirstpass.ffpreset
+++ b/ffpresets/libx264-fastfirstpass.ffpreset
@@ -3,7 +3,7 @@ flags=+loop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partp4x4-partb8x8
 me_method=dia
-subq=1
+subq=2
 me_range=16
 g=250
 keyint_min=25
@@ -14,8 +14,9 @@ qcomp=0.6
 qmin=10
 qmax=51
 qdiff=4
-bf=4
+bf=3
 refs=1
 directpred=3
 trellis=0
-flags2=-bpyramid-wpred-mixed_refs-dct8x8+fastpskip
+flags2=-bpyramid-wpred-mixed_refs-dct8x8+fastpskip+mbtree
+wpredp=2
diff --git a/ffpresets/libx264-hq.ffpreset b/ffpresets/libx264-hq.ffpreset
index cdf67eb067..eafc52e410 100644
--- a/ffpresets/libx264-hq.ffpreset
+++ b/ffpresets/libx264-hq.ffpreset
@@ -14,8 +14,9 @@ qcomp=0.6
 qmin=10
 qmax=51
 qdiff=4
-bf=4
+bf=3
 refs=4
 directpred=3
 trellis=1
-flags2=+bpyramid+wpred+mixed_refs+dct8x8+fastpskip
+flags2=+wpred+mixed_refs+dct8x8+fastpskip+mbtree
+wpredp=2
diff --git a/ffpresets/libx264-ipod320.ffpreset b/ffpresets/libx264-ipod320.ffpreset
index 89441c7deb..b10205025e 100644
--- a/ffpresets/libx264-ipod320.ffpreset
+++ b/ffpresets/libx264-ipod320.ffpreset
@@ -1,6 +1,7 @@
 coder=0
 bf=0
-flags2=-wpred-dct8x8
+flags2=-wpred-dct8x8+mbtree
 level=13
 maxrate=768000
 bufsize=3000000
+wpredp=0
diff --git a/ffpresets/libx264-ipod640.ffpreset b/ffpresets/libx264-ipod640.ffpreset
index fd58e2ad71..aa731dccc5 100644
--- a/ffpresets/libx264-ipod640.ffpreset
+++ b/ffpresets/libx264-ipod640.ffpreset
@@ -1,7 +1,8 @@
 coder=0
 bf=0
 refs=1
-flags2=-wpred-dct8x8
+flags2=-wpred-dct8x8+mbtree
 level=30
 maxrate=10000000
 bufsize=10000000
+wpredp=0
diff --git a/ffpresets/libx264-lossless_fast.ffpreset b/ffpresets/libx264-lossless_fast.ffpreset
index dcf418afda..9bbabf5f7c 100644
--- a/ffpresets/libx264-lossless_fast.ffpreset
+++ b/ffpresets/libx264-lossless_fast.ffpreset
@@ -15,5 +15,6 @@ qmin=10
 qmax=51
 qdiff=4
 directpred=1
-flags2=+fastpskip
+flags2=+fastpskip+mbtree
 cqp=0
+wpredp=0
diff --git a/ffpresets/libx264-lossless_max.ffpreset b/ffpresets/libx264-lossless_max.ffpreset
index b8506c227f..d4ba018b75 100644
--- a/ffpresets/libx264-lossless_max.ffpreset
+++ b/ffpresets/libx264-lossless_max.ffpreset
@@ -16,5 +16,6 @@ qmax=51
 qdiff=4
 refs=16
 directpred=1
-flags2=+mixed_refs+dct8x8+fastpskip
+flags2=+mixed_refs+dct8x8+fastpskip+mbtree
 cqp=0
+wpredp=2
diff --git a/ffpresets/libx264-lossless_medium.ffpreset b/ffpresets/libx264-lossless_medium.ffpreset
index 99fb6b9d07..c4133c2279 100644
--- a/ffpresets/libx264-lossless_medium.ffpreset
+++ b/ffpresets/libx264-lossless_medium.ffpreset
@@ -15,5 +15,6 @@ qmin=10
 qmax=51
 qdiff=4
 directpred=1
-flags2=+fastpskip
+flags2=+fastpskip+mbtree
 cqp=0
+wpredp=2
diff --git a/ffpresets/libx264-lossless_slow.ffpreset b/ffpresets/libx264-lossless_slow.ffpreset
index 2ecb55b07e..1421b95603 100644
--- a/ffpresets/libx264-lossless_slow.ffpreset
+++ b/ffpresets/libx264-lossless_slow.ffpreset
@@ -16,5 +16,6 @@ qmax=51
 qdiff=4
 refs=2
 directpred=1
-flags2=+dct8x8+fastpskip
+flags2=+dct8x8+fastpskip+mbtree
 cqp=0
+wpredp=2
diff --git a/ffpresets/libx264-lossless_slower.ffpreset b/ffpresets/libx264-lossless_slower.ffpreset
index dd499c7afd..942445921e 100644
--- a/ffpresets/libx264-lossless_slower.ffpreset
+++ b/ffpresets/libx264-lossless_slower.ffpreset
@@ -16,5 +16,6 @@ qmax=51
 qdiff=4
 refs=4
 directpred=1
-flags2=+mixed_refs+dct8x8+fastpskip
+flags2=+mixed_refs+dct8x8+fastpskip+mbtree
 cqp=0
+wpredp=2
diff --git a/ffpresets/libx264-lossless_ultrafast.ffpreset b/ffpresets/libx264-lossless_ultrafast.ffpreset
index 1c429f21ff..3c44488e8d 100644
--- a/ffpresets/libx264-lossless_ultrafast.ffpreset
+++ b/ffpresets/libx264-lossless_ultrafast.ffpreset
@@ -15,5 +15,5 @@ qmin=10
 qmax=51
 qdiff=4
 directpred=1
-flags2=+fastpskip
+flags2=+fastpskip+mbtree
 cqp=0
diff --git a/ffpresets/libx264-main.ffpreset b/ffpresets/libx264-main.ffpreset
index d1dc7ddae9..0868c53cff 100644
--- a/ffpresets/libx264-main.ffpreset
+++ b/ffpresets/libx264-main.ffpreset
@@ -1 +1 @@
-flags2=-dct8x8
+flags2=-dct8x8+mbtree
diff --git a/ffpresets/libx264-max.ffpreset b/ffpresets/libx264-max.ffpreset
index afdde53469..4289ab531e 100644
--- a/ffpresets/libx264-max.ffpreset
+++ b/ffpresets/libx264-max.ffpreset
@@ -3,8 +3,8 @@ flags=+loop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4+partb8x8
 me_method=tesa
-subq=9
-me_range=32
+subq=10
+me_range=24
 g=250
 keyint_min=25
 sc_threshold=40
@@ -14,8 +14,9 @@ qcomp=0.6
 qmin=10
 qmax=51
 qdiff=4
-bf=4
+bf=3
 refs=16
 directpred=3
 trellis=2
-flags2=+bpyramid+wpred+mixed_refs+dct8x8-fastpskip
+flags2=+wpred+mixed_refs+dct8x8-fastpskip+mbtree
+wpredp=2
diff --git a/ffpresets/libx264-normal.ffpreset b/ffpresets/libx264-normal.ffpreset
index 99ac2e319a..dd7991bab4 100644
--- a/ffpresets/libx264-normal.ffpreset
+++ b/ffpresets/libx264-normal.ffpreset
@@ -14,8 +14,9 @@ qcomp=0.6
 qmin=10
 qmax=51
 qdiff=4
-bf=4
+bf=3
 refs=2
 directpred=3
 trellis=0
-flags2=+bpyramid+wpred+dct8x8+fastpskip
+flags2=+wpred+dct8x8+fastpskip+mbtree
+wpredp=2
diff --git a/ffpresets/libx264-slowfirstpass.ffpreset b/ffpresets/libx264-slowfirstpass.ffpreset
index 7358d446d2..ba49065c14 100644
--- a/ffpresets/libx264-slowfirstpass.ffpreset
+++ b/ffpresets/libx264-slowfirstpass.ffpreset
@@ -14,8 +14,9 @@ qcomp=0.6
 qmin=10
 qmax=51
 qdiff=4
-bf=4
+bf=3
 refs=1
 directpred=3
 trellis=0
-flags2=+bpyramid+wpred+dct8x8+fastpskip
+flags2=+wpred+dct8x8+fastpskip+mbtree
+wpredp=2
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 835b589ded..065565baf6 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -501,6 +501,7 @@ typedef struct RcOverride{
 #define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
 #define CODEC_FLAG2_NON_LINEAR_QUANT 0x00010000 ///< Use MPEG-2 nonlinear quantizer.
 #define CODEC_FLAG2_BIT_RESERVOIR 0x00020000 ///< Use a bit reservoir when encoding if possible
+#define CODEC_FLAG2_MBTREE        0x00040000 ///< Use macroblock tree ratecontrol (x264 only)
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
@@ -2332,6 +2333,16 @@ typedef struct AVCodecContext {
      * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2.
      */
     int ticks_per_frame;
+
+    /**
+     * explicit P-frame weighted prediction analysis method
+     * 0: off
+     * 1: fast blind weighting (one reference duplicate with -1 offset)
+     * 2: smart weighting (full fade detection analysis)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int weighted_p_pred;
 } AVCodecContext;
 
 /**
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index d2d3b270d0..645d45dbbb 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -30,6 +30,8 @@ typedef struct X264Context {
     x264_param_t    params;
     x264_t         *enc;
     x264_picture_t  pic;
+    uint8_t        *sei;
+    int             sei_size;
     AVFrame         out_pic;
 } X264Context;
 
@@ -48,21 +50,66 @@ static void X264_log(void *p, int level, const char *fmt, va_list args)
     av_vlog(p, level_map[level], fmt, args);
 }
 
-
-static int encode_nals(uint8_t *buf, int size, x264_nal_t *nals, int nnal)
+#if X264_BUILD >= 76
+static int encode_nals(AVCodecContext *ctx, uint8_t *buf, int size,
+                       x264_nal_t *nals, int nnal, int skip_sei)
 {
+    X264Context *x4 = ctx->priv_data;
     uint8_t *p = buf;
     int i;
 
+    /* Write the SEI as part of the first frame. */
+    if (x4->sei_size > 0 && nnal > 0) {
+        memcpy(p, x4->sei, x4->sei_size);
+        p += x4->sei_size;
+        x4->sei_size = 0;
+    }
+
+    for (i = 0; i < nnal; i++){
+        /* Don't put the SEI in extradata. */
+        if (skip_sei && nals[i].i_type == NAL_SEI) {
+            x4->sei_size = nals[i].i_payload;
+            x4->sei      = av_malloc(x4->sei_size);
+            memcpy(x4->sei, nals[i].p_payload, nals[i].i_payload);
+            continue;
+        }
+        memcpy(p, nals[i].p_payload, nals[i].i_payload);
+        p += nals[i].i_payload;
+    }
+
+    return p - buf;
+}
+#else
+static int encode_nals(AVCodecContext *ctx, uint8_t *buf, int size, x264_nal_t *nals, int nnal, int skip_sei)
+{
+    X264Context *x4 = ctx->priv_data;
+    uint8_t *p = buf;
+    int i, s;
+
+    /* Write the SEI as part of the first frame. */
+    if (x4->sei_size > 0 && nnal > 0) {
+        memcpy(p, x4->sei, x4->sei_size);
+        p += x4->sei_size;
+        x4->sei_size = 0;
+    }
+
     for (i = 0; i < nnal; i++) {
-        int s = x264_nal_encode(p, &size, 1, nals + i);
-        if(s < 0)
+        /* Don't put the SEI in extradata. */
+        if (skip_sei && nals[i].i_type == NAL_SEI) {
+            x4->sei = av_malloc( 5 + nals[i].i_payload * 4 / 3 );
+            if(x264_nal_encode(x4->sei, &x4->sei_size, 1, nals + i) < 0)
+                return -1;
+            continue;
+        }
+        s = x264_nal_encode(p, &size, 1, nals + i);
+        if (s < 0)
             return -1;
         p += s;
     }
 
     return p - buf;
 }
+#endif
 
 static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
                       int bufsize, void *data)
@@ -86,15 +133,14 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
         x4->pic.i_type = X264_TYPE_AUTO;
     }
 
-    if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL,
-                            &pic_out))
+    if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
         return -1;
 
-    bufsize = encode_nals(buf, bufsize, nal, nnal);
+    bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0);
     if (bufsize < 0)
         return -1;
 
-    /* FIXME: dts */
+    /* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */
     x4->out_pic.pts = pic_out.i_pts;
 
     switch (pic_out.i_type) {
@@ -111,7 +157,11 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
         break;
     }
 
+#if X264_BUILD < 82
     x4->out_pic.key_frame = pic_out.i_type == X264_TYPE_IDR;
+#else
+    x4->out_pic.key_frame = pic_out.b_keyframe;
+#endif
     x4->out_pic.quality   = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
 
     return bufsize;
@@ -122,6 +172,7 @@ static av_cold int X264_close(AVCodecContext *avctx)
     X264Context *x4 = avctx->priv_data;
 
     av_freep(&avctx->extradata);
+    av_free(x4->sei);
 
     if (x4->enc)
         x264_encoder_close(x4->enc);
@@ -133,6 +184,7 @@ static av_cold int X264_init(AVCodecContext *avctx)
 {
     X264Context *x4 = avctx->priv_data;
 
+    x4->sei_size = 0;
     x264_param_default(&x4->params);
 
     x4->params.pf_log               = X264_log;
@@ -164,7 +216,11 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.b_cabac           = avctx->coder_type == FF_CODER_TYPE_AC;
     x4->params.i_bframe_adaptive = avctx->b_frame_strategy;
     x4->params.i_bframe_bias     = avctx->bframebias;
+#if X264_BUILD >= 78
+    x4->params.i_bframe_pyramid  = avctx->flags2 & CODEC_FLAG2_BPYRAMID ? X264_B_PYRAMID_NORMAL : X264_B_PYRAMID_NONE;
+#else
     x4->params.b_bframe_pyramid  = avctx->flags2 & CODEC_FLAG2_BPYRAMID;
+#endif
     avctx->has_b_frames          = avctx->flags2 & CODEC_FLAG2_BPYRAMID ? 2 : !!avctx->max_b_frames;
 
     x4->params.i_keyint_min = avctx->keyint_min;
@@ -191,8 +247,10 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.i_height             = avctx->height;
     x4->params.vui.i_sar_width      = avctx->sample_aspect_ratio.num;
     x4->params.vui.i_sar_height     = avctx->sample_aspect_ratio.den;
-    x4->params.i_fps_num            = avctx->time_base.den;
-    x4->params.i_fps_den            = avctx->time_base.num;
+#if X264_BUILD >= 81
+    x4->params.i_fps_num = x4->params.i_timebase_den = avctx->time_base.den;
+    x4->params.i_fps_den = x4->params.i_timebase_num = avctx->time_base.num;
+#endif
 
     x4->params.analyse.inter    = 0;
     if (avctx->partitions) {
@@ -211,6 +269,9 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.analyse.i_direct_mv_pred  = avctx->directpred;
 
     x4->params.analyse.b_weighted_bipred = avctx->flags2 & CODEC_FLAG2_WPRED;
+#if X264_BUILD >= 79
+    x4->params.analyse.i_weighted_pred = avctx->weighted_p_pred;
+#endif
 
     if (avctx->me_method == ME_EPZS)
         x4->params.analyse.i_me_method = X264_ME_DIA;
@@ -248,6 +309,9 @@ static av_cold int X264_init(AVCodecContext *avctx)
     } else
         x4->params.rc.f_vbv_buffer_init = 0.9;
 
+#if X264_BUILD >= 69
+    x4->params.rc.b_mb_tree               = !!(avctx->flags2 & CODEC_FLAG2_MBTREE);
+#endif
     x4->params.rc.f_ip_factor             = 1 / fabs(avctx->i_quant_factor);
     x4->params.rc.f_pb_factor             = avctx->b_quant_factor;
     x4->params.analyse.i_chroma_qp_offset = avctx->chromaoffset;
@@ -270,7 +334,18 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
     avctx->coded_frame = &x4->out_pic;
 
+#if X264_BUILD >= 76
     if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+        x264_nal_t *nal;
+        int nnal, s;
+
+        s = x264_encoder_headers(x4->enc, &nal, &nnal);
+
+        avctx->extradata      = av_malloc(s);
+        avctx->extradata_size = encode_nals(avctx, avctx->extradata, s, nal, nnal, 1);
+    }
+#else
+    if(avctx->flags & CODEC_FLAG_GLOBAL_HEADER){
         x264_nal_t *nal;
         int nnal, i, s = 0;
 
@@ -281,8 +356,9 @@ static av_cold int X264_init(AVCodecContext *avctx)
              s += 5 + nal[i].i_payload * 4 / 3;
 
         avctx->extradata = av_malloc(s);
-        avctx->extradata_size = encode_nals(avctx->extradata, s, nal, nnal);
+        avctx->extradata_size = encode_nals(avctx, avctx->extradata, s, nal, nnal, 1);
     }
+#endif
 
     return 0;
 }
@@ -296,6 +372,6 @@ AVCodec libx264_encoder = {
     .encode         = X264_frame,
     .close          = X264_close,
     .capabilities   = CODEC_CAP_DELAY,
-    .pix_fmts       = (enum PixelFormat[]) { PIX_FMT_YUV420P, PIX_FMT_NONE },
+    .pix_fmts       = (const enum PixelFormat[]) { PIX_FMT_YUV420P, PIX_FMT_NONE },
     .long_name      = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
 };
diff --git a/libavcodec/options.c b/libavcodec/options.c
index e5a67a4893..1ab78180a3 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -122,6 +122,7 @@ static const AVOption options[]={
 {"b_qfactor", "qp factor between p and b frames", OFFSET(b_quant_factor), FF_OPT_TYPE_FLOAT, 1.25, -FLT_MAX, FLT_MAX, V|E},
 {"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
+{"wpredp", "weighted prediction analysis method", OFFSET(weighted_p_pred), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
 {"hurry_up", NULL, OFFSET(hurry_up), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
 {"ps", "rtp payload size in bits", OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"mv_bits", NULL, OFFSET(mv_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
@@ -388,6 +389,7 @@ static const AVOption options[]={
 {"request_channels", "set desired number of audio channels", OFFSET(request_channels), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|D},
 {"drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), FF_OPT_TYPE_FLOAT, 1.0, 0.0, 1.0, A|D},
 {"reservoir", "use bit reservoir", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BIT_RESERVOIR, INT_MIN, INT_MAX, A|E, "flags2"},
+{"mbtree", "use macroblock tree ratecontrol (x264 only)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MBTREE, INT_MIN, INT_MAX, V|E, "flags2"},
 {"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"channel_layout", NULL, OFFSET(channel_layout), FF_OPT_TYPE_INT64, DEFAULT, 0, INT64_MAX, A|E|D, "channel_layout"},
 {"request_channel_layout", NULL, OFFSET(request_channel_layout), FF_OPT_TYPE_INT64, DEFAULT, 0, INT64_MAX, A|D, "request_channel_layout"},

From 9d9f1ecfaa1558cb89d9dd5f0efa1415ae11030e Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Fri, 26 Feb 2010 10:56:46 +0000
Subject: [PATCH 073/315] Make sure we dont read over the end. Fixes issue1237.

backport r19322 by michael


Originally committed as revision 22074 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/huffyuv.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index 51acf0570d..dfa06d53a6 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -728,10 +728,17 @@ static void decode_422_bitstream(HYuvContext *s, int count){
 
     count/=2;
 
+    if(count >= (s->gb.size_in_bits - get_bits_count(&s->gb))/(31*4)){
+        for(i=0; i<count && get_bits_count(&s->gb) < s->gb.size_in_bits; i++){
+            READ_2PIX(s->temp[0][2*i  ], s->temp[1][i], 1);
+            READ_2PIX(s->temp[0][2*i+1], s->temp[2][i], 2);
+        }
+    }else{
     for(i=0; i<count; i++){
         READ_2PIX(s->temp[0][2*i  ], s->temp[1][i], 1);
         READ_2PIX(s->temp[0][2*i+1], s->temp[2][i], 2);
     }
+    }
 }
 
 static void decode_gray_bitstream(HYuvContext *s, int count){
@@ -739,9 +746,15 @@ static void decode_gray_bitstream(HYuvContext *s, int count){
 
     count/=2;
 
+    if(count >= (s->gb.size_in_bits - get_bits_count(&s->gb))/(31*2)){
+        for(i=0; i<count && get_bits_count(&s->gb) < s->gb.size_in_bits; i++){
+            READ_2PIX(s->temp[0][2*i  ], s->temp[0][2*i+1], 0);
+        }
+    }else{
     for(i=0; i<count; i++){
         READ_2PIX(s->temp[0][2*i  ], s->temp[0][2*i+1], 0);
     }
+    }
 }
 
 #if CONFIG_HUFFYUV_ENCODER || CONFIG_FFVHUFF_ENCODER

From 8e2149d7dfc6af6291d031baa57fa1e184a85a58 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Fri, 26 Feb 2010 14:32:27 +0000
Subject: [PATCH 074/315] fix the remaining ogv segfaults from issue 1240.

First commit:

Make decode_init fail if the huffman tables are invalid and thus init_vlc fails.
Otherwise this will crash during decoding because the vlc tables are NULL.
Partially fixes ogv/smclock.ogv.1.101.ogv from issue 1240.

backport r19355 by reimar

Second commit:

Add extra validation checks to ff_vorbis_len2vlc.
They should not be necessary, but it seems like a reasonable precaution.

r19374 by reimar


Originally committed as revision 22076 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis.c |  5 +++++
 libavcodec/vp3.c    | 29 +++++++++++++++++++----------
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index 45daa3c2a9..dbc409f8d7 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c
@@ -45,6 +45,9 @@ unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n) {   // x^(1/n)
 
 // Generate vlc codes from vorbis huffman code lengths
 
+// the two bits[p] > 32 checks should be redundant, all calling code should
+// already ensure that, but since it allows overwriting the stack it seems
+// reasonable to check redundantly.
 int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num) {
     uint_fast32_t exit_at_level[33]={404,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
@@ -63,6 +66,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num) {
     }
 
     codes[p]=0;
+    if (bits[p] > 32) return 1;
     for(i=0;i<bits[p];++i) {
         exit_at_level[i+1]=1<<i;
     }
@@ -79,6 +83,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num) {
     ++p;
 
     for(;p<num;++p) {
+        if (bits[p] > 32) return 1;
         if (bits[p]==0) continue;
         // find corresponding exit(node which the tree can grow further from)
         for(i=bits[p];i>0;--i) {
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index f30c060e17..429c4f98a4 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -1744,29 +1744,34 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
         for (i = 0; i < 16; i++) {
 
             /* DC histograms */
-            init_vlc(&s->dc_vlc[i], 5, 32,
+            if (init_vlc(&s->dc_vlc[i], 5, 32,
                 &s->huffman_table[i][0][1], 4, 2,
-                &s->huffman_table[i][0][0], 4, 2, 0);
+                &s->huffman_table[i][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 1 AC histograms */
-            init_vlc(&s->ac_vlc_1[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_1[i], 5, 32,
                 &s->huffman_table[i+16][0][1], 4, 2,
-                &s->huffman_table[i+16][0][0], 4, 2, 0);
+                &s->huffman_table[i+16][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 2 AC histograms */
-            init_vlc(&s->ac_vlc_2[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_2[i], 5, 32,
                 &s->huffman_table[i+16*2][0][1], 4, 2,
-                &s->huffman_table[i+16*2][0][0], 4, 2, 0);
+                &s->huffman_table[i+16*2][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 3 AC histograms */
-            init_vlc(&s->ac_vlc_3[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_3[i], 5, 32,
                 &s->huffman_table[i+16*3][0][1], 4, 2,
-                &s->huffman_table[i+16*3][0][0], 4, 2, 0);
+                &s->huffman_table[i+16*3][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 4 AC histograms */
-            init_vlc(&s->ac_vlc_4[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_4[i], 5, 32,
                 &s->huffman_table[i+16*4][0][1], 4, 2,
-                &s->huffman_table[i+16*4][0][0], 4, 2, 0);
+                &s->huffman_table[i+16*4][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
         }
     }
 
@@ -1805,6 +1810,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     }
 
     return 0;
+
+vlc_fail:
+    av_log(avctx, AV_LOG_FATAL, "Invalid huffman table\n");
+    return -1;
 }
 
 /*

From a317cd572205ff3611e4da836f22b93e280221d2 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Fri, 26 Feb 2010 15:49:52 +0000
Subject: [PATCH 075/315] Avoid divisions by 0 in the ASF demuxer if
 packet_size is not valid.

r19330 by reimar


Originally committed as revision 22080 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/asfdec.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 4d61739719..cb695c1a44 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -555,7 +555,9 @@ int ff_asf_get_packet(AVFormatContext *s, ByteIOContext *pb)
     int rsize = 8;
     int c, d, e, off;
 
-    off= (url_ftell(pb) - s->data_offset) % asf->packet_size + 3;
+    off= 32768;
+    if (s->packet_size > 0)
+        off= (url_ftell(pb) - s->data_offset) % asf->packet_size + 3;
 
     c=d=e=-1;
     while(off-- > 0){
@@ -941,7 +943,8 @@ static int64_t asf_read_pts(AVFormatContext *s, int stream_index, int64_t *ppos,
         start_pos[i]= pos;
     }
 
-    pos= (pos+asf->packet_size-1-s->data_offset)/asf->packet_size*asf->packet_size+ s->data_offset;
+    if (s->packet_size > 0)
+        pos= (pos+asf->packet_size-1-s->data_offset)/asf->packet_size*asf->packet_size+ s->data_offset;
     *ppos= pos;
     url_fseek(s->pb, pos, SEEK_SET);
 

From 4fb58ecea89d52374dacf9a88fca263e5269ae13 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sat, 27 Feb 2010 10:01:45 +0000
Subject: [PATCH 076/315] bump LIBAVCODEC_VERSION_MICRO for addition of the
 lock manager API

As discussed with Diego, we'll go for bumping micro in 0.5 and will
consider adding a RELEASEVERSION macro for trunk and 0.6 seperatly


Originally committed as revision 22087 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/avcodec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 065565baf6..19ac4ec59e 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -31,7 +31,7 @@
 
 #define LIBAVCODEC_VERSION_MAJOR 52
 #define LIBAVCODEC_VERSION_MINOR 20
-#define LIBAVCODEC_VERSION_MICRO  0
+#define LIBAVCODEC_VERSION_MICRO  1
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \

From bd7e30ea001afdf3965d45651305214357cddfe8 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sat, 27 Feb 2010 10:16:45 +0000
Subject: [PATCH 077/315] add myself to gpg fingerprint list

backport r22089 by siretart


Originally committed as revision 22090 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 70d3de2bee..5ba4d819a4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -329,5 +329,6 @@ Loren Merritt                 ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE
 Michael Niedermayer           9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
 Peter Ross                    A907 E02F A6E5 0CD2 34CD 20D2 6760 79C5 AC40 DD6B
 Reimar Döffinger              C61D 16E5 9E2C D10C 8958 38A4 0899 A2B9 06D4 D9C7
+Reinhard Tartler              9300 5DC2 7E87 6C37 ED7B CA9A 9808 3544 9453 48A4
 Reynaldo H. Verdejo Pinochet  6E27 CD34 170C C78E 4D4F 5F40 C18E 077F 3114 452A
 Sascha Sommer                 38A0 F88B 868E 9D3A 97D4 D6A0 E823 706F 1E07 0D3C

From 4c83c13bc8f9ee727fe4bec471c6ff61ae5e311f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 28 Feb 2010 22:22:22 +0000
Subject: [PATCH 078/315] Mention security fixes in the changelog.

Originally committed as revision 22121 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Changelog b/Changelog
index abcdac3a3e..d0e17138e4 100644
--- a/Changelog
+++ b/Changelog
@@ -13,6 +13,9 @@ version 0.5.1:
 - AMR-NB decoding/encoding, AMR-WB decoding via OpenCORE libraries
 - enable symbol versioning by default for linkers that support it
 - backport av_lockmgr_register(), see doc/APIchanges for details
+- security fixes for:
+  - ASF, Ogg and MOV demuxers
+  - FFv1, H.264, HuffYUV, MLP, MPEG audio and Snow decoders
 
 
From 922c55a09bdf7a70ba2a6f75ba56c1345b6955f2 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Mon, 1 Mar 2010 16:22:27 +0000
Subject: [PATCH 079/315] amend release notes for 0.5.1

Originally committed as revision 22129 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/RELEASE b/RELEASE
index 6b02332ca6..9be3a6c301 100644
--- a/RELEASE
+++ b/RELEASE
@@ -37,3 +37,48 @@ The deprecated APIs to be removed are:
 
 If at all possible, do not use the deprecated APIs. All notes on API changes
 should appear in doc/APIchanges.
+
+* 0.5.1
+
+General notes
+-------------
+
+This point release includes some minor updates to make the 0.5 release series
+usable for users that need to retain the existing behavior as closely as
+possible. The changes follow below:
+
+
+Security fixes
+--------------
+
+Various programming errors in container and codec implementations
+may lead to denial of service or the execution of arbitrary code
+if the user is tricked into opening a malformed media file or stream.
+
+Affected and updated have been the implementations of the following
+codecs and container formats:
+
+ - the Vorbis audio codec
+ - the FF Video 1 codec
+ - the MPEG audio codec
+ - the H264 video codec
+ - the MLP codec
+ - the HuffYUV codec
+ - the ASF demuxer
+ - the Ogg container implementation
+ - the MOV container implementation
+
+Symbol Versioning enabled
+-------------------------
+
+The backported symbol versioning change is enabled on platforms that support
+it. This allows users to upgrade from 0.5.1 to the upcoming 0.6 release
+without having to recompile their applications. Please note that distributors
+have to recompile applications against 0.5.1 before upgrading to 0.6.
+
+
+libx264.c backport
+------------------
+
+This release includes a backport to the libx264 wrapper that allows FFmpeg to
+be compiled against newer versions of libx264 up to API version 85.

From 015a7d736233959b52e38cf46ae6804cfa46995c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 1 Mar 2010 17:58:50 +0000
Subject: [PATCH 080/315] Add release managers, merged from trunk.

Originally committed as revision 22133 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5ba4d819a4..6b9baa2f8d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -49,6 +49,7 @@ build system (configure,Makefiles)      Diego Biurrun, Mans Rullgard
 project server                          Diego Biurrun, Mans Rullgard
 mailinglists                            Michael Niedermayer, Baptiste Coudurier
 presets                                 Robert Swain
+release management                      Diego Biurrun, Reinhard Tartler
 
 
 libavutil

From 6d767afb7c73e66b471801c7b2e622eb8359b257 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 1 Mar 2010 18:03:53 +0000
Subject: [PATCH 081/315] If we are using partial release names we might as
 well try to be funny.

Originally committed as revision 22134 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE b/RELEASE
index 9be3a6c301..7d10411f3f 100644
--- a/RELEASE
+++ b/RELEASE
@@ -1,7 +1,7 @@
 Release Notes
 =============
 
-* 0.5 "Bike Shed" March 3, 2009
+* 0.5 "Bike Shed World Domination" March 3, 2009
 
 General notes
 -------------

From eade5150e4cbc8397346748bbe85a7742e17a99c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 2 Mar 2010 14:25:48 +0000
Subject: [PATCH 082/315] Mention licensing-related changes; some whitespace
 adjustments.

Originally committed as revision 22145 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/RELEASE b/RELEASE
index 7d10411f3f..dd18c45229 100644
--- a/RELEASE
+++ b/RELEASE
@@ -38,6 +38,8 @@ The deprecated APIs to be removed are:
 If at all possible, do not use the deprecated APIs. All notes on API changes
 should appear in doc/APIchanges.
 
+
+
 * 0.5.1
 
 General notes
@@ -47,7 +49,6 @@ This point release includes some minor updates to make the 0.5 release series
 usable for users that need to retain the existing behavior as closely as
 possible. The changes follow below:
 
-
 Security fixes
 --------------
 
@@ -76,9 +77,27 @@ it. This allows users to upgrade from 0.5.1 to the upcoming 0.6 release
 without having to recompile their applications. Please note that distributors
 have to recompile applications against 0.5.1 before upgrading to 0.6.
 
-
 libx264.c backport
 ------------------
 
 This release includes a backport to the libx264 wrapper that allows FFmpeg to
 be compiled against newer versions of libx264 up to API version 85.
+
+licensing changes
+-----------------
+
+Previously both libswscale and our AC-3 decoder had GPLed parts. These have
+been replaced by fresh LGPL code. x86 optimizations for libswscale remain GPL,
+but the C code is fully functional. Optimizations for other architectures have
+been relicensed to LGPL.
+
+AMR-NB decoding/encoding and AMR-WB decoding is now possible through the free
+software OpenCORE libraries as an alternative to the non-free libamr libraries.
+
+We found out that libfaac contains non-free parts and is not LGPL as previously
+claimed. We have changed configure to reflect this. You now have to pass the
+--enable-nonfree option if you wish to compile with libfaac support enabled.
+
+Furthermore the non-free bits in libavcodec/fdctref.c have been rewritten. Note
+well that they were only used in a test program and never compiled into any
+FFmpeg library.

From 306eefc49fc06d33fe9d9f950d99e0d88242e204 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 2 Mar 2010 14:43:01 +0000
Subject: [PATCH 083/315] Bump version to 0.5.1.

Originally committed as revision 22146 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 2eb3c4fe4e..4b9fcbec10 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5
+0.5.1

From c46038f6b78133f1d7b23100f41784bb7fdbbcac Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 2 Mar 2010 16:03:06 +0000
Subject: [PATCH 084/315] fix 'seektest' again

backport  r19270 by rbultje:

Remove any reference to ASFContext.packet_size and replace it with
AVFormatContext.packet_size. See "[PATCH] asf*.c/h: use
AVFormatContext->packet_size instead of own copy" thread on ML.

and r19361 by reimar:

Check for packet_length 0, it is already treated as invalid by the padding check,
but that resulted in a confusing/wrong error message.


Originally committed as revision 22147 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/asf.h    |  1 -
 libavformat/asfdec.c | 23 +++++++++++------------
 libavformat/asfenc.c | 14 +++++++-------
 3 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/libavformat/asf.h b/libavformat/asf.h
index a36013bdd2..7e7225cae7 100644
--- a/libavformat/asf.h
+++ b/libavformat/asf.h
@@ -80,7 +80,6 @@ typedef struct {
 
 typedef struct {
     uint32_t seqno;
-    unsigned int packet_size;
     int is_streamed;
     int asfid2avid[128];                 ///< conversion table from asf ID 2 AVStream ID
     ASFStream streams[128];              ///< it's max number and it's not that big
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index cb695c1a44..074266bb1c 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -212,7 +212,7 @@ static int asf_read_header(AVFormatContext *s, AVFormatParameters *ap)
             asf->hdr.min_pktsize        = get_le32(pb);
             asf->hdr.max_pktsize        = get_le32(pb);
             asf->hdr.max_bitrate        = get_le32(pb);
-            asf->packet_size = asf->hdr.max_pktsize;
+            s->packet_size = asf->hdr.max_pktsize;
         } else if (!memcmp(&g, &ff_asf_stream_header, sizeof(GUID))) {
             enum CodecType type;
             int type_specific_size, sizeX;
@@ -557,7 +557,7 @@ int ff_asf_get_packet(AVFormatContext *s, ByteIOContext *pb)
 
     off= 32768;
     if (s->packet_size > 0)
-        off= (url_ftell(pb) - s->data_offset) % asf->packet_size + 3;
+        off= (url_ftell(pb) - s->data_offset) % s->packet_size + 3;
 
     c=d=e=-1;
     while(off-- > 0){
@@ -587,12 +587,12 @@ int ff_asf_get_packet(AVFormatContext *s, ByteIOContext *pb)
     asf->packet_flags    = c;
     asf->packet_property = d;
 
-    DO_2BITS(asf->packet_flags >> 5, packet_length, asf->packet_size);
+    DO_2BITS(asf->packet_flags >> 5, packet_length, s->packet_size);
     DO_2BITS(asf->packet_flags >> 1, padsize, 0); // sequence ignored
     DO_2BITS(asf->packet_flags >> 3, padsize, 0); // padding length
 
     //the following checks prevent overflows and infinite loops
-    if(packet_length >= (1U<<29)){
+    if(!packet_length || packet_length >= (1U<<29)){
         av_log(s, AV_LOG_ERROR, "invalid packet_length %d at:%"PRId64"\n", packet_length, url_ftell(pb));
         return -1;
     }
@@ -616,7 +616,7 @@ int ff_asf_get_packet(AVFormatContext *s, ByteIOContext *pb)
     if (packet_length < asf->hdr.min_pktsize)
         padsize += asf->hdr.min_pktsize - packet_length;
     asf->packet_padsize = padsize;
-    dprintf(s, "packet: size=%d padsize=%d  left=%d\n", asf->packet_size, asf->packet_padsize, asf->packet_size_left);
+    dprintf(s, "packet: size=%d padsize=%d  left=%d\n", s->packet_size, asf->packet_padsize, asf->packet_size_left);
     return 0;
 }
 
@@ -788,7 +788,7 @@ int ff_asf_parse_packet(AVFormatContext *s, ByteIOContext *pb, AVPacket *pkt)
 
         /* read data */
         //printf("READ PACKET s:%d  os:%d  o:%d,%d  l:%d   DATA:%p\n",
-        //       asf->packet_size, asf_st->pkt.size, asf->packet_frag_offset,
+        //       s->packet_size, asf_st->pkt.size, asf->packet_frag_offset,
         //       asf_st->frag_offset, asf->packet_frag_size, asf_st->pkt.data);
         asf->packet_size_left -= asf->packet_frag_size;
         if (asf->packet_size_left < 0)
@@ -931,7 +931,6 @@ static int asf_read_close(AVFormatContext *s)
 
 static int64_t asf_read_pts(AVFormatContext *s, int stream_index, int64_t *ppos, int64_t pos_limit)
 {
-    ASFContext *asf = s->priv_data;
     AVPacket pkt1, *pkt = &pkt1;
     ASFStream *asf_st;
     int64_t pts;
@@ -944,7 +943,7 @@ static int64_t asf_read_pts(AVFormatContext *s, int stream_index, int64_t *ppos,
     }
 
     if (s->packet_size > 0)
-        pos= (pos+asf->packet_size-1-s->data_offset)/asf->packet_size*asf->packet_size+ s->data_offset;
+        pos= (pos+s->packet_size-1-s->data_offset)/s->packet_size*s->packet_size+ s->data_offset;
     *ppos= pos;
     url_fseek(s->pb, pos, SEEK_SET);
 
@@ -964,7 +963,7 @@ static int64_t asf_read_pts(AVFormatContext *s, int stream_index, int64_t *ppos,
 
             asf_st= s->streams[i]->priv_data;
 
-//            assert((asf_st->packet_pos - s->data_offset) % asf->packet_size == 0);
+//            assert((asf_st->packet_pos - s->data_offset) % s->packet_size == 0);
             pos= asf_st->packet_pos;
 
             av_add_index_entry(s->streams[i], pos, pts, pkt->size, pos - start_pos[i] + 1, AVINDEX_KEYFRAME);
@@ -1007,10 +1006,10 @@ static void asf_build_simple_index(AVFormatContext *s, int stream_index)
             int pktct =get_le16(s->pb);
             av_log(s, AV_LOG_DEBUG, "pktnum:%d, pktct:%d\n", pktnum, pktct);
 
-            pos=s->data_offset + asf->packet_size*(int64_t)pktnum;
+            pos=s->data_offset + s->packet_size*(int64_t)pktnum;
             index_pts=av_rescale(itime, i, 10000);
 
-            av_add_index_entry(s->streams[stream_index], pos, index_pts, asf->packet_size, 0, AVINDEX_KEYFRAME);
+            av_add_index_entry(s->streams[stream_index], pos, index_pts, s->packet_size, 0, AVINDEX_KEYFRAME);
         }
         asf->index_read= 1;
     }
@@ -1024,7 +1023,7 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, int64_t pts, int
     int64_t pos;
     int index;
 
-    if (asf->packet_size <= 0)
+    if (s->packet_size <= 0)
         return -1;
 
     /* Try using the protocol's read_seek if available */
diff --git a/libavformat/asfenc.c b/libavformat/asfenc.c
index 8c5aec738f..8b33071e8b 100644
--- a/libavformat/asfenc.c
+++ b/libavformat/asfenc.c
@@ -321,8 +321,8 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
     put_le64(pb, asf->duration); /* duration (in 100ns units) */
     put_le64(pb, PREROLL_TIME); /* start time stamp */
     put_le32(pb, (asf->is_streamed || url_is_streamed(pb)) ? 3 : 2); /* ??? */
-    put_le32(pb, asf->packet_size); /* packet size */
-    put_le32(pb, asf->packet_size); /* packet size */
+    put_le32(pb, s->packet_size); /* packet size */
+    put_le32(pb, s->packet_size); /* packet size */
     put_le32(pb, bit_rate); /* Nominal data rate in bps */
     end_header(pb, hpos);
 
@@ -514,7 +514,7 @@ static int asf_write_header(AVFormatContext *s)
 {
     ASFContext *asf = s->priv_data;
 
-    asf->packet_size = PACKET_SIZE;
+    s->packet_size  = PACKET_SIZE;
     asf->nb_packets = 0;
 
     asf->last_indexed_pts = 0;
@@ -536,7 +536,7 @@ static int asf_write_header(AVFormatContext *s)
     asf->packet_nb_payloads = 0;
     asf->packet_timestamp_start = -1;
     asf->packet_timestamp_end = -1;
-    init_put_byte(&asf->pb, asf->packet_buf, asf->packet_size, 1,
+    init_put_byte(&asf->pb, asf->packet_buf, s->packet_size, 1,
                   NULL, NULL, NULL, NULL);
 
     return 0;
@@ -612,7 +612,7 @@ static void flush_packet(AVFormatContext *s)
     assert(asf->packet_timestamp_end >= asf->packet_timestamp_start);
 
     if (asf->is_streamed) {
-        put_chunk(s, 0x4424, asf->packet_size, 0);
+        put_chunk(s, 0x4424, s->packet_size, 0);
     }
 
     packet_hdr_size = put_payload_parsing_info(
@@ -627,14 +627,14 @@ static void flush_packet(AVFormatContext *s)
     assert(packet_hdr_size <= asf->packet_size_left);
     memset(asf->packet_buf + packet_filled_size, 0, asf->packet_size_left);
 
-    put_buffer(s->pb, asf->packet_buf, asf->packet_size - packet_hdr_size);
+    put_buffer(s->pb, asf->packet_buf, s->packet_size - packet_hdr_size);
 
     put_flush_packet(s->pb);
     asf->nb_packets++;
     asf->packet_nb_payloads = 0;
     asf->packet_timestamp_start = -1;
     asf->packet_timestamp_end = -1;
-    init_put_byte(&asf->pb, asf->packet_buf, asf->packet_size, 1,
+    init_put_byte(&asf->pb, asf->packet_buf, s->packet_size, 1,
                   NULL, NULL, NULL, NULL);
 }
 

From 578c32814c1abcc45bc13531555cab3ae1d015f0 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 3 Mar 2010 08:25:10 +0000
Subject: [PATCH 085/315] Add point release date.

Originally committed as revision 22163 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE b/RELEASE
index dd18c45229..747c3b77f3 100644
--- a/RELEASE
+++ b/RELEASE
@@ -40,7 +40,7 @@ should appear in doc/APIchanges.
 
 
-* 0.5.1
+* 0.5.1 March 2, 2010
 
 General notes
 -------------

From 775aa5f38cacfd217efe835203b786aecf8eed5a Mon Sep 17 00:00:00 2001
From: Andres Mejia <mcitadel@gmail.com>
Date: Sat, 6 Mar 2010 16:57:43 +0000
Subject: [PATCH 086/315] Add Hurd to OS list and disable dv1394 in the Hurd
 case. patch by Andres Mejia, mcitadel gmail com

backport r18938 by diego


Originally committed as revision 22237 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure b/configure
index e96db9e352..ee5a2a5417 100755
--- a/configure
+++ b/configure
@@ -1659,6 +1659,9 @@ case $target_os in
         ;;
     gnu/kfreebsd)
         ;;
+    gnu)
+        disable dv1394
+        ;;
 
     *)
         die "Unknown OS '$target_os'."

From fe95afe1e2aee04d6bb85ef4bbaa3333e9168461 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 6 Mar 2010 19:40:37 +0000
Subject: [PATCH 087/315] libswscale is no longer GPL; update help comment
 accordingly.

Originally committed as revision 22250 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index ee5a2a5417..bf5adc316a 100755
--- a/configure
+++ b/configure
@@ -82,7 +82,7 @@ show_help(){
   echo "  --disable-ffplay         disable ffplay build"
   echo "  --disable-ffserver       disable ffserver build"
   echo "  --enable-postproc        enable GPLed postprocessing support [no]"
-  echo "  --enable-swscale         enable GPLed software scaler support [no]"
+  echo "  --enable-swscale         enable software scaler support [no]"
   echo "  --enable-avfilter        video filter support (replaces vhook) [no]"
   echo "  --enable-avfilter-lavf   video filters dependent on avformat [no]"
   echo "  --disable-vhook          disable video hooking support"

From 461243731d0ab7532fb4979414cb26afabc03f5e Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 6 Mar 2010 19:50:56 +0000
Subject: [PATCH 088/315] Mention LGPL libswscale in the Changelog.

Originally committed as revision 22253 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Changelog b/Changelog
index d0e17138e4..ce4f0e46ff 100644
--- a/Changelog
+++ b/Changelog
@@ -6,6 +6,7 @@ version 0.5.1:
 
 - build system updates
 - documentation updates
+- libswscale now is LGPL except for x86 optimizations
 - fix for GPL code in libswscale that was erroneously activated
 - AltiVec code in libswscale is now LGPL
 - remaining GPL parts in AC-3 decoder converted to LGPL

From 557e065d5fb738551057348e4c7eb0feba81a3f8 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Wed, 10 Mar 2010 20:55:07 +0000
Subject: [PATCH 089/315] Fix compilation on powerpc with --disable-altivec

in case altivec is disabled, even compilation of code using altivec
keywords or asm must be avoided.

backport r30869 from mplayer repo by siretart


Originally committed as revision 22436 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libswscale/swscale.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 0c089c810a..0bdebc59e7 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -960,7 +960,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 #endif
 
 #if ARCH_PPC
-#if HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT
+#if HAVE_ALTIVEC
 #define COMPILE_ALTIVEC
 #endif
 #endif //ARCH_PPC
@@ -1649,7 +1649,7 @@ static SwsFunc getSwsFunc(int flags){
         return swScale_C;
 
 #else
-#if ARCH_PPC
+#if ARCH_PPC && COMPILE_ALTIVEC
     if (flags & SWS_CPU_CAPS_ALTIVEC)
         return swScale_altivec;
     else

From 7fd4cbb51973ccb061736e177584201a178f99ed Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Fri, 12 Mar 2010 20:35:04 +0000
Subject: [PATCH 090/315] fix compilation issue on powerpc

unlike the ARCH_ macros, COMPILE_ALTIVEC needs to be tested more carefully


Originally committed as revision 22488 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libswscale/swscale.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 0bdebc59e7..dd8f27adbc 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1649,7 +1649,7 @@ static SwsFunc getSwsFunc(int flags){
         return swScale_C;
 
 #else
-#if ARCH_PPC && COMPILE_ALTIVEC
+#if ARCH_PPC && defined COMPILE_ALTIVEC
     if (flags & SWS_CPU_CAPS_ALTIVEC)
         return swScale_altivec;
     else

From 96ca078b22ad8bdb34444f4e56a79327faeaea65 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Wed, 24 Mar 2010 19:35:30 +0000
Subject: [PATCH 091/315] Check validity of channels & samplerate. This may be
 security relevant. Based on 2 patches by chrome.

backport r19975 by michael


Originally committed as revision 22658 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/vorbis_dec.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 6cfdf48ee7..98756572bf 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -902,8 +902,16 @@ static int vorbis_parse_id_hdr(vorbis_context *vc){
     }
 
     vc->version=get_bits_long(gb, 32);    //FIXME check 0
-    vc->audio_channels=get_bits(gb, 8);   //FIXME check >0
-    vc->audio_samplerate=get_bits_long(gb, 32);   //FIXME check >0
+    vc->audio_channels=get_bits(gb, 8);
+    if(vc->audio_channels <= 0){
+        av_log(vc->avccontext, AV_LOG_ERROR, "Invalid number of channels\n");
+        return -1;
+    }
+    vc->audio_samplerate=get_bits_long(gb, 32);
+    if(vc->audio_samplerate <= 0){
+        av_log(vc->avccontext, AV_LOG_ERROR, "Invalid samplerate\n");
+        return -1;
+    }
     vc->bitrate_maximum=get_bits_long(gb, 32);
     vc->bitrate_nominal=get_bits_long(gb, 32);
     vc->bitrate_minimum=get_bits_long(gb, 32);

From 2fcb56dab925770028ffba4df0392766e287f810 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 24 May 2010 21:41:51 +0000
Subject: [PATCH 092/315] Update Changelog for 0.5.2 release.

Originally committed as revision 23298 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Changelog b/Changelog
index ce4f0e46ff..64d843d5fb 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,14 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 
+version 0.5.2:
+
+- Hurd support
+- PowerPC without Altivec compilation issues
+- validate channels and samplerate in the Vorbis decoder
+
+
+
 version 0.5.1:
 
 - build system updates

From ee20f19b203e1e8982136c9246b5fdb391157350 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 24 May 2010 21:55:01 +0000
Subject: [PATCH 093/315] Bump version number for 0.5.2 release.

Originally committed as revision 23299 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 4b9fcbec10..cb0c939a93 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.1
+0.5.2

From 021054a1962769c1d231bfa9e9178961c5b75e30 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 24 May 2010 21:58:47 +0000
Subject: [PATCH 094/315] release notes for 0.5.2

Originally committed as revision 23300 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/RELEASE b/RELEASE
index 747c3b77f3..641d117c7b 100644
--- a/RELEASE
+++ b/RELEASE
@@ -101,3 +101,14 @@ claimed. We have changed configure to reflect this. You now have to pass the
 Furthermore the non-free bits in libavcodec/fdctref.c have been rewritten. Note
 well that they were only used in a test program and never compiled into any
 FFmpeg library.
+
+
+
+* 0.5.2 May 25, 2010
+
+General notes
+-------------
+
+This is a maintenance only release that addresses a small number of security
+and portability issues. Distributors and system integrators are encouraged
+to update and share their patches against this branch.

From fc038df32e43c94a23933f8e8ccef09c11c97cd7 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Mon, 31 May 2010 13:06:51 +0000
Subject: [PATCH 095/315] configure: improve temp file creation and cleanup

backport r17752 by mru


Originally committed as revision 23393 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 configure | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/configure b/configure
index bf5adc316a..745d8fdd67 100755
--- a/configure
+++ b/configure
@@ -261,7 +261,6 @@ Include the log file "$logfile" produced by configure as this will help
 solving the problem.
 EOF
     fi
-    rm -f $TMPC $TMPE $TMPH $TMPO $TMPS $TMPSH
     exit 1
 }
 
@@ -1358,13 +1357,36 @@ esac
 : ${TMPDIR:=$TMP}
 : ${TMPDIR:=/tmp}
 
-TMPC="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.c"
-TMPE="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}${EXESUF}"
-TMPH="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.h"
-TMPO="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.o"
-TMPS="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.S"
-TMPV="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.ver"
-TMPSH="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.sh"
+
+if ! check_cmd type mktemp; then
+    # simple replacement for missing mktemp
+    # NOT SAFE FOR GENERAL USE
+    mktemp(){
+        echo "${2%XXX*}.${HOSTNAME}.${UID}.$$"
+    }
+fi
+
+
+tmpfile(){
+    tmp=$(mktemp -u "${TMPDIR}/ffconf.XXXXXXXX")$2 &&
+        (set -C; exec > $tmp) 2>/dev/null ||
+        die "Unable to create temoporary file in $TMPDIR."
+    append TMPFILES $tmp
+    eval $1=$tmp
+}
+
+trap 'rm -f -- $TMPFILES' EXIT
+trap exit HUP INT TERM
+
+tmpfile TMPC  .c
+tmpfile TMPE  $EXESUF
+tmpfile TMPH  .h
+tmpfile TMPO  .o
+tmpfile TMPS  .S
+tmpfile TMPV  .ver
+tmpfile TMPSH .sh
+
+unset -f mktemp
 
 # make sure we can execute files in $TMPDIR
 cat > $TMPSH 2>> $logfile <<EOF
@@ -1378,7 +1400,6 @@ variable to another directory and make sure that it is not mounted noexec.
 EOF
     die "Sanity test failed."
 fi
-rm $TMPSH
 
 if   $cc --version 2>/dev/null | grep -qi gcc; then
     cc_type=gcc
@@ -2522,8 +2543,6 @@ cmp -s $TMPH config.h &&
     echo "config.h is unchanged" ||
     mv -f $TMPH config.h
 
-rm -f $TMPC $TMPE $TMPH $TMPO $TMPS $TMPSH
-
 # build tree in object directory if source path is different from current one
 if enabled source_path_used; then
     DIRS="\

From 84e6629de31cad467b7d9378a1faf2358b4f15b4 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 22 Jul 2010 11:58:26 +0000
Subject: [PATCH 096/315] aviobuf: Do short seeks forward by reading and
 skipping data instead of a proper seek

This improves performance on e.g. seekable http.


backport r24280 by mstorsjo


Originally committed as revision 24428 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/aviobuf.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index 93efd65d0b..9650ecfc9d 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@@ -27,6 +27,13 @@
 
 #define IO_BUFFER_SIZE 32768
 
+/**
+ * Do seeks within this distance ahead of the current buffer by skipping
+ * data instead of calling the protocol seek function, for seekable
+ * protocols.
+ */
+#define SHORT_SEEK_THRESHOLD 4096
+
 static void fill_buffer(ByteIOContext *s);
 
 int init_put_byte(ByteIOContext *s,
@@ -151,8 +158,10 @@ int64_t url_fseek(ByteIOContext *s, int64_t offset, int whence)
         offset1 >= 0 && offset1 < (s->buf_end - s->buffer)) {
         /* can do the seek inside the buffer */
         s->buf_ptr = s->buffer + offset1;
-    } else if(s->is_streamed && !s->write_flag &&
-              offset1 >= 0 && offset1 < (s->buf_end - s->buffer) + (1<<16)){
+    } else if ((s->is_streamed ||
+               offset1 <= s->buf_end + SHORT_SEEK_THRESHOLD - s->buffer) &&
+               !s->write_flag && offset1 >= 0 &&
+              (whence != SEEK_END || force)) {
         while(s->pos < offset && !s->eof_reached)
             fill_buffer(s);
         if (s->eof_reached)

From 2dea9a1266c302912572c182739632af3c00d970 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 3 Oct 2010 14:50:04 +0000
Subject: [PATCH 097/315] unbreak compilation and finish backport r24280 by
 mstorsjo

Originally committed as revision 25324 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavformat/aviobuf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index 9650ecfc9d..3143a23a61 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@@ -160,8 +160,7 @@ int64_t url_fseek(ByteIOContext *s, int64_t offset, int whence)
         s->buf_ptr = s->buffer + offset1;
     } else if ((s->is_streamed ||
                offset1 <= s->buf_end + SHORT_SEEK_THRESHOLD - s->buffer) &&
-               !s->write_flag && offset1 >= 0 &&
-              (whence != SEEK_END || force)) {
+               !s->write_flag && offset1 >= 0) {
         while(s->pos < offset && !s->eof_reached)
             fill_buffer(s);
         if (s->eof_reached)

From 2f504d7a90605b77d1a9ac43a8d1efa208e0f515 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 3 Oct 2010 14:51:50 +0000
Subject: [PATCH 098/315] Fix several security issues in flicvideo.c This fixes
 CVE-2010-3429

backport r25223 by michael


Originally committed as revision 25325 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 libavcodec/flicvideo.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/libavcodec/flicvideo.c b/libavcodec/flicvideo.c
index 2261c40524..95255f99b3 100644
--- a/libavcodec/flicvideo.c
+++ b/libavcodec/flicvideo.c
@@ -160,7 +160,7 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
     int pixel_skip;
     int pixel_countdown;
     unsigned char *pixels;
-    int pixel_limit;
+    unsigned int pixel_limit;
 
     s->frame.reference = 1;
     s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
@@ -254,10 +254,13 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                     av_log(avctx, AV_LOG_ERROR, "Undefined opcode (%x) in DELTA_FLI\n", line_packets);
                 } else if ((line_packets & 0xC000) == 0x8000) {
                     // "last byte" opcode
-                    pixels[y_ptr + s->frame.linesize[0] - 1] = line_packets & 0xff;
+                    pixel_ptr= y_ptr + s->frame.linesize[0] - 1;
+                    CHECK_PIXEL_PTR(0);
+                    pixels[pixel_ptr] = line_packets & 0xff;
                 } else {
                     compressed_lines--;
                     pixel_ptr = y_ptr;
+                    CHECK_PIXEL_PTR(0);
                     pixel_countdown = s->avctx->width;
                     for (i = 0; i < line_packets; i++) {
                         /* account for the skip bytes */
@@ -269,7 +272,7 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                             byte_run = -byte_run;
                             palette_idx1 = buf[stream_ptr++];
                             palette_idx2 = buf[stream_ptr++];
-                            CHECK_PIXEL_PTR(byte_run);
+                            CHECK_PIXEL_PTR(byte_run * 2);
                             for (j = 0; j < byte_run; j++, pixel_countdown -= 2) {
                                 pixels[pixel_ptr++] = palette_idx1;
                                 pixels[pixel_ptr++] = palette_idx2;
@@ -299,6 +302,7 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
             stream_ptr += 2;
             while (compressed_lines > 0) {
                 pixel_ptr = y_ptr;
+                CHECK_PIXEL_PTR(0);
                 pixel_countdown = s->avctx->width;
                 line_packets = buf[stream_ptr++];
                 if (line_packets > 0) {
@@ -454,7 +458,7 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
     int pixel_countdown;
     unsigned char *pixels;
     int pixel;
-    int pixel_limit;
+    unsigned int pixel_limit;
 
     s->frame.reference = 1;
     s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
@@ -504,6 +508,7 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                 } else {
                     compressed_lines--;
                     pixel_ptr = y_ptr;
+                    CHECK_PIXEL_PTR(0);
                     pixel_countdown = s->avctx->width;
                     for (i = 0; i < line_packets; i++) {
                         /* account for the skip bytes */
@@ -515,13 +520,13 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                             byte_run = -byte_run;
                             pixel    = AV_RL16(&buf[stream_ptr]);
                             stream_ptr += 2;
-                            CHECK_PIXEL_PTR(byte_run);
+                            CHECK_PIXEL_PTR(2 * byte_run);
                             for (j = 0; j < byte_run; j++, pixel_countdown -= 2) {
                                 *((signed short*)(&pixels[pixel_ptr])) = pixel;
                                 pixel_ptr += 2;
                             }
                         } else {
-                            CHECK_PIXEL_PTR(byte_run);
+                            CHECK_PIXEL_PTR(2 * byte_run);
                             for (j = 0; j < byte_run; j++, pixel_countdown--) {
                                 *((signed short*)(&pixels[pixel_ptr])) = AV_RL16(&buf[stream_ptr]);
                                 stream_ptr += 2;
@@ -612,7 +617,7 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                     if (byte_run > 0) {
                         pixel    = AV_RL16(&buf[stream_ptr]);
                         stream_ptr += 2;
-                        CHECK_PIXEL_PTR(byte_run);
+                        CHECK_PIXEL_PTR(2 * byte_run);
                         for (j = 0; j < byte_run; j++) {
                             *((signed short*)(&pixels[pixel_ptr])) = pixel;
                             pixel_ptr += 2;
@@ -623,7 +628,7 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                         }
                     } else {  /* copy pixels if byte_run < 0 */
                         byte_run = -byte_run;
-                        CHECK_PIXEL_PTR(byte_run);
+                        CHECK_PIXEL_PTR(2 * byte_run);
                         for (j = 0; j < byte_run; j++) {
                             *((signed short*)(&pixels[pixel_ptr])) = AV_RL16(&buf[stream_ptr]);
                             stream_ptr += 2;

From 69e8b43812bf994f843c74cea26650386cd6e589 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 18 Oct 2010 19:38:02 +0000
Subject: [PATCH 099/315] Update Changelog for 0.5.3 release.

Originally committed as revision 25521 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 Changelog | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Changelog b/Changelog
index 64d843d5fb..ef46c7f4d6 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,15 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 
+version 0.5.3:
+
+- build system improvements
+- performance fix for seekable HTTP
+- fix several potentially exploitable issues in the FLIC decoder
+  (addresses CVE-2010-3429)
+
+
+
 version 0.5.2:
 
 - Hurd support

From 7d10059aeb4e9a35e18acad80b035a085b1ecfb2 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 18 Oct 2010 19:40:09 +0000
Subject: [PATCH 100/315] Bump version number for 0.5.3 release.

Originally committed as revision 25522 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index cb0c939a93..be14282b7f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.2
+0.5.3

From c9864adf34fcb6b5f022c93dbf1ce7722127f1a4 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Mon, 18 Oct 2010 19:43:55 +0000
Subject: [PATCH 101/315] release notes for 0.5.3

Originally committed as revision 25523 to svn://svn.ffmpeg.org/ffmpeg/branches/0.5
---
 RELEASE | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/RELEASE b/RELEASE
index 641d117c7b..cdc923b907 100644
--- a/RELEASE
+++ b/RELEASE
@@ -112,3 +112,15 @@ General notes
 This is a maintenance only release that addresses a small number of security
 and portability issues. Distributors and system integrators are encouraged
 to update and share their patches against this branch.
+
+
+
+* 0.5.3 Oct 18, 2010
+
+General notes
+-------------
+
+This is (again) another maintenance only release that addresses a fix
+for seekable HTTP and an exploitable bug in the FLIC decoder
+(cf. CVE-2010-3429 for details). Distributors and system integrators are
+encouraged to update and share their patches against this branch.

From 48b086b0efa40799ace96bcec010b6b72a9490d6 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 22 Apr 2009 01:54:05 +0000
Subject: [PATCH 102/315] Update safety check as the maximum pixel size is no
 longer 4. New max size is 16bit * 4 samples (RGBA).

Originally committed as revision 18655 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 445f0a8b666a34e6402f6ae96c6804c8bc024baa)

Addresses: CVE-2010-3908
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index f628774cd7..acfafa1344 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -189,7 +189,7 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
 }
 
 int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h){
-    if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/4)
+    if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8)
         return 0;
 
     av_log(av_log_ctx, AV_LOG_ERROR, "picture size invalid (%ux%u)\n", w, h);

From 44511b17cbbb524602c91a198e7314fa57a7062a Mon Sep 17 00:00:00 2001
From: Kostya Shishkov <kostya.shishkov@gmail.com>
Date: Sun, 22 Nov 2009 07:48:35 +0000
Subject: [PATCH 103/315] Update dimensions in AVCodecContext when RV3/4 frame
 dimensions change

Originally committed as revision 20572 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit ec10d2d53999f6edf7d7b5ac88df263eccfb1fb0)

Fixes heap corruption crashes

Addresses: CVE-2011-0722
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/rv34.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index c2277076a5..9fe3919ca1 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1247,8 +1247,8 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
         if(s->width != r->si.width || s->height != r->si.height){
             av_log(s->avctx, AV_LOG_DEBUG, "Changing dimensions to %dx%d\n", r->si.width,r->si.height);
             MPV_common_end(s);
-            s->width  = r->si.width;
-            s->height = r->si.height;
+            s->width  = s->avctx->width  = r->si.width;
+            s->height = s->avctx->height = r->si.height;
             if(MPV_common_init(s) < 0)
                 return -1;
             r->intra_types_hist = av_realloc(r->intra_types_hist, s->b4_stride * 4 * 2 * sizeof(*r->intra_types_hist));

From 9109a58867d75299281936bc6ed3dcdf50c70722 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-ffmpeg@jannau.net>
Date: Mon, 17 Jan 2011 15:49:11 +0100
Subject: [PATCH 104/315] convert svn:ignore properties to .gitignore files

Signed-off-by: Janne Grunau <janne-ffmpeg@jannau.net>
(cherry picked from commit 348b8218f7a59374355c966dbe3b851a7275f952)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 .gitignore                  | 15 +++++++++++++++
 doc/.gitignore              |  3 +++
 ffpresets/.gitignore        |  1 +
 libavcodec/.gitignore       |  8 ++++++++
 libavcodec/alpha/.gitignore |  3 +++
 libavcodec/arm/.gitignore   |  3 +++
 libavcodec/avr32/.gitignore |  3 +++
 libavcodec/bfin/.gitignore  |  3 +++
 libavcodec/mips/.gitignore  |  3 +++
 libavcodec/mlib/.gitignore  |  3 +++
 libavcodec/ppc/.gitignore   |  3 +++
 libavcodec/ps2/.gitignore   |  3 +++
 libavcodec/sh4/.gitignore   |  3 +++
 libavcodec/sparc/.gitignore |  3 +++
 libavcodec/x86/.gitignore   |  5 +++++
 libavcore/.gitignore        |  5 +++++
 libavdevice/.gitignore      |  8 ++++++++
 libavfilter/.gitignore      |  8 ++++++++
 libavfilter/x86/.gitignore  |  1 +
 libavformat/.gitignore      |  8 ++++++++
 libavutil/.gitignore        |  9 +++++++++
 libavutil/arm/.gitignore    |  3 +++
 libavutil/avr32/.gitignore  |  2 ++
 libavutil/bfin/.gitignore   |  3 +++
 libavutil/mips/.gitignore   |  2 ++
 libavutil/ppc/.gitignore    |  3 +++
 libavutil/sh4/.gitignore    |  3 +++
 libavutil/tomi/.gitignore   |  3 +++
 libavutil/x86/.gitignore    |  3 +++
 libpostproc/.gitignore      |  6 ++++++
 libswscale/.gitignore       |  7 +++++++
 libswscale/bfin/.gitignore  |  2 ++
 libswscale/mlib/.gitignore  |  2 ++
 libswscale/ppc/.gitignore   |  2 ++
 libswscale/sparc/.gitignore |  2 ++
 libswscale/x86/.gitignore   |  2 ++
 tests/.gitignore            | 12 ++++++++++++
 tools/.gitignore            | 18 ++++++++++++++++++
 38 files changed, 176 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 doc/.gitignore
 create mode 100644 ffpresets/.gitignore
 create mode 100644 libavcodec/.gitignore
 create mode 100644 libavcodec/alpha/.gitignore
 create mode 100644 libavcodec/arm/.gitignore
 create mode 100644 libavcodec/avr32/.gitignore
 create mode 100644 libavcodec/bfin/.gitignore
 create mode 100644 libavcodec/mips/.gitignore
 create mode 100644 libavcodec/mlib/.gitignore
 create mode 100644 libavcodec/ppc/.gitignore
 create mode 100644 libavcodec/ps2/.gitignore
 create mode 100644 libavcodec/sh4/.gitignore
 create mode 100644 libavcodec/sparc/.gitignore
 create mode 100644 libavcodec/x86/.gitignore
 create mode 100644 libavcore/.gitignore
 create mode 100644 libavdevice/.gitignore
 create mode 100644 libavfilter/.gitignore
 create mode 100644 libavfilter/x86/.gitignore
 create mode 100644 libavformat/.gitignore
 create mode 100644 libavutil/.gitignore
 create mode 100644 libavutil/arm/.gitignore
 create mode 100644 libavutil/avr32/.gitignore
 create mode 100644 libavutil/bfin/.gitignore
 create mode 100644 libavutil/mips/.gitignore
 create mode 100644 libavutil/ppc/.gitignore
 create mode 100644 libavutil/sh4/.gitignore
 create mode 100644 libavutil/tomi/.gitignore
 create mode 100644 libavutil/x86/.gitignore
 create mode 100644 libpostproc/.gitignore
 create mode 100644 libswscale/.gitignore
 create mode 100644 libswscale/bfin/.gitignore
 create mode 100644 libswscale/mlib/.gitignore
 create mode 100644 libswscale/ppc/.gitignore
 create mode 100644 libswscale/sparc/.gitignore
 create mode 100644 libswscale/x86/.gitignore
 create mode 100644 tests/.gitignore
 create mode 100644 tools/.gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..64dc57bec0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+.config
+.version
+*.o
+*.d
+*.exe
+*.ho
+*-test
+*_g
+config.*
+doxy
+ffmpeg
+ffplay
+ffprobe
+ffserver
+version.h
diff --git a/doc/.gitignore b/doc/.gitignore
new file mode 100644
index 0000000000..aeaee91f95
--- /dev/null
+++ b/doc/.gitignore
@@ -0,0 +1,3 @@
+*.1
+*.html
+*.pod
diff --git a/ffpresets/.gitignore b/ffpresets/.gitignore
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/ffpresets/.gitignore
@@ -0,0 +1 @@
+
diff --git a/libavcodec/.gitignore b/libavcodec/.gitignore
new file mode 100644
index 0000000000..40a1c87eec
--- /dev/null
+++ b/libavcodec/.gitignore
@@ -0,0 +1,8 @@
+*.o
+*.d
+*.exe
+*.ho
+*-example
+*-test
+libavcodec*
+
diff --git a/libavcodec/alpha/.gitignore b/libavcodec/alpha/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/alpha/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/arm/.gitignore b/libavcodec/arm/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/arm/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/avr32/.gitignore b/libavcodec/avr32/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/avr32/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/bfin/.gitignore b/libavcodec/bfin/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/bfin/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/mips/.gitignore b/libavcodec/mips/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/mips/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/mlib/.gitignore b/libavcodec/mlib/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/mlib/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/ppc/.gitignore b/libavcodec/ppc/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/ppc/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/ps2/.gitignore b/libavcodec/ps2/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/ps2/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/sh4/.gitignore b/libavcodec/sh4/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/sh4/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/sparc/.gitignore b/libavcodec/sparc/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavcodec/sparc/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavcodec/x86/.gitignore b/libavcodec/x86/.gitignore
new file mode 100644
index 0000000000..eb249fb180
--- /dev/null
+++ b/libavcodec/x86/.gitignore
@@ -0,0 +1,5 @@
+*.d
+*.exe
+*.ho
+*-test
+
diff --git a/libavcore/.gitignore b/libavcore/.gitignore
new file mode 100644
index 0000000000..81b575239e
--- /dev/null
+++ b/libavcore/.gitignore
@@ -0,0 +1,5 @@
+*.o
+*.d
+*.ho
+libavcore*
+
diff --git a/libavdevice/.gitignore b/libavdevice/.gitignore
new file mode 100644
index 0000000000..5d53ca790f
--- /dev/null
+++ b/libavdevice/.gitignore
@@ -0,0 +1,8 @@
+*.o
+*.d
+*.exe
+*.ho
+*-example
+*-test
+libavdevice*
+
diff --git a/libavfilter/.gitignore b/libavfilter/.gitignore
new file mode 100644
index 0000000000..f6d3cd0c1a
--- /dev/null
+++ b/libavfilter/.gitignore
@@ -0,0 +1,8 @@
+*.o
+*.d
+*.exe
+*.ho
+*-example
+*-test
+libavfilter*
+
diff --git a/libavfilter/x86/.gitignore b/libavfilter/x86/.gitignore
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/libavfilter/x86/.gitignore
@@ -0,0 +1 @@
+
diff --git a/libavformat/.gitignore b/libavformat/.gitignore
new file mode 100644
index 0000000000..50e4684f77
--- /dev/null
+++ b/libavformat/.gitignore
@@ -0,0 +1,8 @@
+*.o
+*.d
+*.exe
+*.ho
+*-example
+*-test
+libavformat*
+
diff --git a/libavutil/.gitignore b/libavutil/.gitignore
new file mode 100644
index 0000000000..3e21b95006
--- /dev/null
+++ b/libavutil/.gitignore
@@ -0,0 +1,9 @@
+*.d
+*.o
+*.exe
+*.ho
+*-example
+*-test
+libavutil*
+avconfig.h
+
diff --git a/libavutil/arm/.gitignore b/libavutil/arm/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavutil/arm/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavutil/avr32/.gitignore b/libavutil/avr32/.gitignore
new file mode 100644
index 0000000000..2a6b22b026
--- /dev/null
+++ b/libavutil/avr32/.gitignore
@@ -0,0 +1,2 @@
+*.ho
+
diff --git a/libavutil/bfin/.gitignore b/libavutil/bfin/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavutil/bfin/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavutil/mips/.gitignore b/libavutil/mips/.gitignore
new file mode 100644
index 0000000000..2a6b22b026
--- /dev/null
+++ b/libavutil/mips/.gitignore
@@ -0,0 +1,2 @@
+*.ho
+
diff --git a/libavutil/ppc/.gitignore b/libavutil/ppc/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavutil/ppc/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavutil/sh4/.gitignore b/libavutil/sh4/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavutil/sh4/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavutil/tomi/.gitignore b/libavutil/tomi/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavutil/tomi/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libavutil/x86/.gitignore b/libavutil/x86/.gitignore
new file mode 100644
index 0000000000..854570fa24
--- /dev/null
+++ b/libavutil/x86/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.ho
+
diff --git a/libpostproc/.gitignore b/libpostproc/.gitignore
new file mode 100644
index 0000000000..f0eb9a0fac
--- /dev/null
+++ b/libpostproc/.gitignore
@@ -0,0 +1,6 @@
+*.d
+*.exe
+*.ho
+*-example
+*-test
+libpostproc*
diff --git a/libswscale/.gitignore b/libswscale/.gitignore
new file mode 100644
index 0000000000..fd91a1c6e3
--- /dev/null
+++ b/libswscale/.gitignore
@@ -0,0 +1,7 @@
+*.d
+*.exe
+*.ho
+*-example
+*-test
+libswscale*
+*.o
diff --git a/libswscale/bfin/.gitignore b/libswscale/bfin/.gitignore
new file mode 100644
index 0000000000..d57a7d62c6
--- /dev/null
+++ b/libswscale/bfin/.gitignore
@@ -0,0 +1,2 @@
+*.d
+*.ho
diff --git a/libswscale/mlib/.gitignore b/libswscale/mlib/.gitignore
new file mode 100644
index 0000000000..d57a7d62c6
--- /dev/null
+++ b/libswscale/mlib/.gitignore
@@ -0,0 +1,2 @@
+*.d
+*.ho
diff --git a/libswscale/ppc/.gitignore b/libswscale/ppc/.gitignore
new file mode 100644
index 0000000000..d57a7d62c6
--- /dev/null
+++ b/libswscale/ppc/.gitignore
@@ -0,0 +1,2 @@
+*.d
+*.ho
diff --git a/libswscale/sparc/.gitignore b/libswscale/sparc/.gitignore
new file mode 100644
index 0000000000..d57a7d62c6
--- /dev/null
+++ b/libswscale/sparc/.gitignore
@@ -0,0 +1,2 @@
+*.d
+*.ho
diff --git a/libswscale/x86/.gitignore b/libswscale/x86/.gitignore
new file mode 100644
index 0000000000..d57a7d62c6
--- /dev/null
+++ b/libswscale/x86/.gitignore
@@ -0,0 +1,2 @@
+*.d
+*.ho
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000000..ae7b4eacba
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,12 @@
+*.o
+*.d
+*.exe
+audiogen
+base64
+data
+rotozoom
+seek_test
+tiny_psnr
+videogen
+vsynth1
+vsynth2
diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 0000000000..535660960d
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1,18 @@
+*.o
+*.d
+*.exe
+cws2fws
+graph2dot
+lavfi-showfiltfmts
+pktdumper
+probetest
+qt-faststart
+trasher*.d
+*.exe
+cws2fws
+graph2dot
+lavfi-showfiltfmts
+pktdumper
+probetest
+qt-faststart
+trasher

From 11f6eebdd36b034312e12d97525af564fc10668c Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-ffmpeg@jannau.net>
Date: Tue, 18 Jan 2011 20:44:24 +0100
Subject: [PATCH 105/315] consolidate .gitignore patters into a single file

Signed-off-by: Janne Grunau <janne-ffmpeg@jannau.net>
(cherry picked from commit 2c3589bfda036c7827ded0bf38b16dfe7630bae1)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 .gitignore                  | 30 ++++++++++++++++++++++++++++++
 doc/.gitignore              |  3 ---
 ffpresets/.gitignore        |  1 -
 libavcodec/.gitignore       |  8 --------
 libavcodec/alpha/.gitignore |  3 ---
 libavcodec/arm/.gitignore   |  3 ---
 libavcodec/avr32/.gitignore |  3 ---
 libavcodec/bfin/.gitignore  |  3 ---
 libavcodec/mips/.gitignore  |  3 ---
 libavcodec/mlib/.gitignore  |  3 ---
 libavcodec/ppc/.gitignore   |  3 ---
 libavcodec/ps2/.gitignore   |  3 ---
 libavcodec/sh4/.gitignore   |  3 ---
 libavcodec/sparc/.gitignore |  3 ---
 libavcodec/x86/.gitignore   |  5 -----
 libavcore/.gitignore        |  5 -----
 libavdevice/.gitignore      |  8 --------
 libavfilter/.gitignore      |  8 --------
 libavfilter/x86/.gitignore  |  1 -
 libavformat/.gitignore      |  8 --------
 libavutil/.gitignore        |  9 ---------
 libavutil/arm/.gitignore    |  3 ---
 libavutil/avr32/.gitignore  |  2 --
 libavutil/bfin/.gitignore   |  3 ---
 libavutil/mips/.gitignore   |  2 --
 libavutil/ppc/.gitignore    |  3 ---
 libavutil/sh4/.gitignore    |  3 ---
 libavutil/tomi/.gitignore   |  3 ---
 libavutil/x86/.gitignore    |  3 ---
 libpostproc/.gitignore      |  6 ------
 libswscale/.gitignore       |  7 -------
 libswscale/bfin/.gitignore  |  2 --
 libswscale/mlib/.gitignore  |  2 --
 libswscale/ppc/.gitignore   |  2 --
 libswscale/sparc/.gitignore |  2 --
 libswscale/x86/.gitignore   |  2 --
 tests/.gitignore            | 12 ------------
 tools/.gitignore            | 18 ------------------
 38 files changed, 30 insertions(+), 161 deletions(-)
 delete mode 100644 doc/.gitignore
 delete mode 100644 ffpresets/.gitignore
 delete mode 100644 libavcodec/.gitignore
 delete mode 100644 libavcodec/alpha/.gitignore
 delete mode 100644 libavcodec/arm/.gitignore
 delete mode 100644 libavcodec/avr32/.gitignore
 delete mode 100644 libavcodec/bfin/.gitignore
 delete mode 100644 libavcodec/mips/.gitignore
 delete mode 100644 libavcodec/mlib/.gitignore
 delete mode 100644 libavcodec/ppc/.gitignore
 delete mode 100644 libavcodec/ps2/.gitignore
 delete mode 100644 libavcodec/sh4/.gitignore
 delete mode 100644 libavcodec/sparc/.gitignore
 delete mode 100644 libavcodec/x86/.gitignore
 delete mode 100644 libavcore/.gitignore
 delete mode 100644 libavdevice/.gitignore
 delete mode 100644 libavfilter/.gitignore
 delete mode 100644 libavfilter/x86/.gitignore
 delete mode 100644 libavformat/.gitignore
 delete mode 100644 libavutil/.gitignore
 delete mode 100644 libavutil/arm/.gitignore
 delete mode 100644 libavutil/avr32/.gitignore
 delete mode 100644 libavutil/bfin/.gitignore
 delete mode 100644 libavutil/mips/.gitignore
 delete mode 100644 libavutil/ppc/.gitignore
 delete mode 100644 libavutil/sh4/.gitignore
 delete mode 100644 libavutil/tomi/.gitignore
 delete mode 100644 libavutil/x86/.gitignore
 delete mode 100644 libpostproc/.gitignore
 delete mode 100644 libswscale/.gitignore
 delete mode 100644 libswscale/bfin/.gitignore
 delete mode 100644 libswscale/mlib/.gitignore
 delete mode 100644 libswscale/ppc/.gitignore
 delete mode 100644 libswscale/sparc/.gitignore
 delete mode 100644 libswscale/x86/.gitignore
 delete mode 100644 tests/.gitignore
 delete mode 100644 tools/.gitignore

diff --git a/.gitignore b/.gitignore
index 64dc57bec0..eac1758ed9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,12 +4,42 @@
 *.d
 *.exe
 *.ho
+*-example
 *-test
 *_g
 config.*
+doc/*.1
+doc/*.html
+doc/*.pod
 doxy
 ffmpeg
 ffplay
 ffprobe
 ffserver
+libavcodec/libavcodec*
+libavcore/libavcore*
+libavdevice/libavdevice*
+libavfilter/libavfilter*
+libavformat/libavformat*
+libavutil/avconfig.h
+libavutil/libavutil*
+libpostproc/libpostproc*
+libswscale/libswscale*
+tests/audiogen
+tests/base64
+tests/data
+tests/rotozoom
+tests/seek_test
+tests/tiny_psnr
+tests/videogen
+tests/vsynth1
+tests/vsynth2
+tools/cws2fws
+tools/graph2dot
+tools/lavfi-showfiltfmts
+tools/pktdumper
+tools/probetest
+tools/qt-faststart
+tools/trasher
+tools/trasher*.d
 version.h
diff --git a/doc/.gitignore b/doc/.gitignore
deleted file mode 100644
index aeaee91f95..0000000000
--- a/doc/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.1
-*.html
-*.pod
diff --git a/ffpresets/.gitignore b/ffpresets/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/ffpresets/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/libavcodec/.gitignore b/libavcodec/.gitignore
deleted file mode 100644
index 40a1c87eec..0000000000
--- a/libavcodec/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-*.o
-*.d
-*.exe
-*.ho
-*-example
-*-test
-libavcodec*
-
diff --git a/libavcodec/alpha/.gitignore b/libavcodec/alpha/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/alpha/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/arm/.gitignore b/libavcodec/arm/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/arm/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/avr32/.gitignore b/libavcodec/avr32/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/avr32/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/bfin/.gitignore b/libavcodec/bfin/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/bfin/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/mips/.gitignore b/libavcodec/mips/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/mips/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/mlib/.gitignore b/libavcodec/mlib/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/mlib/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/ppc/.gitignore b/libavcodec/ppc/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/ppc/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/ps2/.gitignore b/libavcodec/ps2/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/ps2/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/sh4/.gitignore b/libavcodec/sh4/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/sh4/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/sparc/.gitignore b/libavcodec/sparc/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavcodec/sparc/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavcodec/x86/.gitignore b/libavcodec/x86/.gitignore
deleted file mode 100644
index eb249fb180..0000000000
--- a/libavcodec/x86/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*.d
-*.exe
-*.ho
-*-test
-
diff --git a/libavcore/.gitignore b/libavcore/.gitignore
deleted file mode 100644
index 81b575239e..0000000000
--- a/libavcore/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*.o
-*.d
-*.ho
-libavcore*
-
diff --git a/libavdevice/.gitignore b/libavdevice/.gitignore
deleted file mode 100644
index 5d53ca790f..0000000000
--- a/libavdevice/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-*.o
-*.d
-*.exe
-*.ho
-*-example
-*-test
-libavdevice*
-
diff --git a/libavfilter/.gitignore b/libavfilter/.gitignore
deleted file mode 100644
index f6d3cd0c1a..0000000000
--- a/libavfilter/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-*.o
-*.d
-*.exe
-*.ho
-*-example
-*-test
-libavfilter*
-
diff --git a/libavfilter/x86/.gitignore b/libavfilter/x86/.gitignore
deleted file mode 100644
index 8b13789179..0000000000
--- a/libavfilter/x86/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/libavformat/.gitignore b/libavformat/.gitignore
deleted file mode 100644
index 50e4684f77..0000000000
--- a/libavformat/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-*.o
-*.d
-*.exe
-*.ho
-*-example
-*-test
-libavformat*
-
diff --git a/libavutil/.gitignore b/libavutil/.gitignore
deleted file mode 100644
index 3e21b95006..0000000000
--- a/libavutil/.gitignore
+++ /dev/null
@@ -1,9 +0,0 @@
-*.d
-*.o
-*.exe
-*.ho
-*-example
-*-test
-libavutil*
-avconfig.h
-
diff --git a/libavutil/arm/.gitignore b/libavutil/arm/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavutil/arm/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavutil/avr32/.gitignore b/libavutil/avr32/.gitignore
deleted file mode 100644
index 2a6b22b026..0000000000
--- a/libavutil/avr32/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.ho
-
diff --git a/libavutil/bfin/.gitignore b/libavutil/bfin/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavutil/bfin/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavutil/mips/.gitignore b/libavutil/mips/.gitignore
deleted file mode 100644
index 2a6b22b026..0000000000
--- a/libavutil/mips/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.ho
-
diff --git a/libavutil/ppc/.gitignore b/libavutil/ppc/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavutil/ppc/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavutil/sh4/.gitignore b/libavutil/sh4/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavutil/sh4/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavutil/tomi/.gitignore b/libavutil/tomi/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavutil/tomi/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libavutil/x86/.gitignore b/libavutil/x86/.gitignore
deleted file mode 100644
index 854570fa24..0000000000
--- a/libavutil/x86/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.d
-*.ho
-
diff --git a/libpostproc/.gitignore b/libpostproc/.gitignore
deleted file mode 100644
index f0eb9a0fac..0000000000
--- a/libpostproc/.gitignore
+++ /dev/null
@@ -1,6 +0,0 @@
-*.d
-*.exe
-*.ho
-*-example
-*-test
-libpostproc*
diff --git a/libswscale/.gitignore b/libswscale/.gitignore
deleted file mode 100644
index fd91a1c6e3..0000000000
--- a/libswscale/.gitignore
+++ /dev/null
@@ -1,7 +0,0 @@
-*.d
-*.exe
-*.ho
-*-example
-*-test
-libswscale*
-*.o
diff --git a/libswscale/bfin/.gitignore b/libswscale/bfin/.gitignore
deleted file mode 100644
index d57a7d62c6..0000000000
--- a/libswscale/bfin/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.d
-*.ho
diff --git a/libswscale/mlib/.gitignore b/libswscale/mlib/.gitignore
deleted file mode 100644
index d57a7d62c6..0000000000
--- a/libswscale/mlib/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.d
-*.ho
diff --git a/libswscale/ppc/.gitignore b/libswscale/ppc/.gitignore
deleted file mode 100644
index d57a7d62c6..0000000000
--- a/libswscale/ppc/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.d
-*.ho
diff --git a/libswscale/sparc/.gitignore b/libswscale/sparc/.gitignore
deleted file mode 100644
index d57a7d62c6..0000000000
--- a/libswscale/sparc/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.d
-*.ho
diff --git a/libswscale/x86/.gitignore b/libswscale/x86/.gitignore
deleted file mode 100644
index d57a7d62c6..0000000000
--- a/libswscale/x86/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.d
-*.ho
diff --git a/tests/.gitignore b/tests/.gitignore
deleted file mode 100644
index ae7b4eacba..0000000000
--- a/tests/.gitignore
+++ /dev/null
@@ -1,12 +0,0 @@
-*.o
-*.d
-*.exe
-audiogen
-base64
-data
-rotozoom
-seek_test
-tiny_psnr
-videogen
-vsynth1
-vsynth2
diff --git a/tools/.gitignore b/tools/.gitignore
deleted file mode 100644
index 535660960d..0000000000
--- a/tools/.gitignore
+++ /dev/null
@@ -1,18 +0,0 @@
-*.o
-*.d
-*.exe
-cws2fws
-graph2dot
-lavfi-showfiltfmts
-pktdumper
-probetest
-qt-faststart
-trasher*.d
-*.exe
-cws2fws
-graph2dot
-lavfi-showfiltfmts
-pktdumper
-probetest
-qt-faststart
-trasher

From e332c41670dbdb49a748f07ccc60e37c8d8ebbcd Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 10 Feb 2011 14:09:35 +0100
Subject: [PATCH 106/315] also ignore *.so for vhook plugins

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index eac1758ed9..7a7555d189 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 .config
 .version
 *.o
+*.so
 *.d
 *.exe
 *.ho

From d6860fb653ed42a9d35e134f843f03cc049b74f1 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Sun, 13 Feb 2011 20:41:13 +0100
Subject: [PATCH 107/315] Fix crashes in vorbis decoding found by zzuf Fixes
 issue 2322.

Originally committed as revision 25591 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 3dde66752d59dfdd0f3727efd66e7202b3c75078)

Addresses: CVE-2010-4704
---
 libavcodec/vorbis_dec.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 98756572bf..541a406722 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -60,8 +60,8 @@ typedef struct vorbis_floor0_s vorbis_floor0;
 typedef struct vorbis_floor1_s vorbis_floor1;
 struct vorbis_context_s;
 typedef
-uint_fast8_t (* vorbis_floor_decode_func)
-             (struct vorbis_context_s *, vorbis_floor_data *, float *);
+int (* vorbis_floor_decode_func)
+    (struct vorbis_context_s *, vorbis_floor_data *, float *);
 typedef struct {
     uint_fast8_t floor_type;
     vorbis_floor_decode_func decode;
@@ -443,14 +443,14 @@ static int vorbis_parse_setup_hdr_tdtransforms(vorbis_context *vc) {
 
 // Process floors part
 
-static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
+static int vorbis_floor0_decode(vorbis_context *vc,
                                          vorbis_floor_data *vfu, float *vec);
 static void create_map( vorbis_context * vc, uint_fast8_t floor_number );
-static uint_fast8_t vorbis_floor1_decode(vorbis_context *vc,
+static int vorbis_floor1_decode(vorbis_context *vc,
                                          vorbis_floor_data *vfu, float *vec);
 static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
     GetBitContext *gb=&vc->gb;
-    uint_fast16_t i,j,k;
+    int i,j,k;
 
     vc->floor_count=get_bits(gb, 6)+1;
 
@@ -1038,7 +1038,7 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext) {
 
 // Read and decode floor
 
-static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
+static int vorbis_floor0_decode(vorbis_context *vc,
                                          vorbis_floor_data *vfu, float *vec) {
     vorbis_floor0 * vf=&vfu->t0;
     float * lsp=vf->lsp;
@@ -1062,6 +1062,9 @@ static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
         }
         AV_DEBUG( "floor0 dec: booknumber: %u\n", book_idx );
         codebook=vc->codebooks[vf->book_list[book_idx]];
+        /* Invalid codebook! */
+        if (!codebook.codevectors)
+            return -1;
 
         while (lsp_len<vf->order) {
             int vec_off;
@@ -1151,7 +1154,7 @@ static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
     return 0;
 }
 
-static uint_fast8_t vorbis_floor1_decode(vorbis_context *vc, vorbis_floor_data *vfu, float *vec) {
+static int vorbis_floor1_decode(vorbis_context *vc, vorbis_floor_data *vfu, float *vec) {
     vorbis_floor1 * vf=&vfu->t1;
     GetBitContext *gb=&vc->gb;
     uint_fast16_t range_v[4]={ 256, 128, 86, 64 };
@@ -1527,14 +1530,21 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
 
     for(i=0;i<vc->audio_channels;++i) {
         vorbis_floor *floor;
+        int ret;
         if (mapping->submaps>1) {
             floor=&vc->floors[mapping->submap_floor[mapping->mux[i]]];
         } else {
             floor=&vc->floors[mapping->submap_floor[0]];
         }
 
-        no_residue[i]=floor->decode(vc, &floor->data, ch_floor_ptr);
-        ch_floor_ptr+=blocksize/2;
+        ret = floor->decode(vc, &floor->data, ch_floor_ptr);
+
+        if (ret < 0) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "Invalid codebook in vorbis_floor_decode.\n");
+            return -1;
+        }
+        no_residue[i] = ret;
+        ch_floor_ptr += blocksize / 2;
     }
 
 // Nonzero vector propagate

From 329e816ed7903cf078c52aecd32a3be3b5dabbee Mon Sep 17 00:00:00 2001
From: Frank Barchard <fbarchard@google.com>
Date: Sun, 13 Feb 2011 21:38:45 +0100
Subject: [PATCH 108/315] Check rangebits to avoid a possible crash. Fixes
 issue 2548 (and Chrome issue 68115 and unknown CERT issues).

Originally committed as revision 26365 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 13184036a6b1b1d4b61c91118c0896e9ad4634c3)

Addresses: CVE-2011-0480

Conflicts:

	libavcodec/vorbis_dec.c
---
 libavcodec/vorbis_dec.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 541a406722..5b8b056393 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -466,6 +466,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
         if (floor_setup->floor_type==1) {
             uint_fast8_t maximum_class=0;
             uint_fast8_t rangebits;
+            uint_fast32_t rangemax;
             uint_fast16_t floor1_values=2;
 
             floor_setup->decode=vorbis_floor1_decode;
@@ -526,8 +527,15 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
 
 
             rangebits=get_bits(gb, 4);
+            rangemax = (1 << rangebits);
+            if (rangemax > vc->blocksize[1] / 2) {
+                av_log(vc->avccontext, AV_LOG_ERROR,
+                       "Floor value is too large for blocksize: %d (%d)\n",
+                       rangemax, vc->blocksize[1] / 2);
+                return -1;
+            }
             floor_setup->data.t1.list[0].x = 0;
-            floor_setup->data.t1.list[1].x = (1<<rangebits);
+            floor_setup->data.t1.list[1].x = rangemax;
 
             for(j=0;j<floor_setup->data.t1.partitions;++j) {
                 for(k=0;k<floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];++k,++floor1_values) {

From 04847a06c2f6f1316b553ea100c3f70b651587fd Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 13 Feb 2011 23:34:41 +0100
Subject: [PATCH 109/315] Update Changelog for 0.5.4 release.

---
 Changelog | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Changelog b/Changelog
index ef46c7f4d6..6d631fe3fc 100644
--- a/Changelog
+++ b/Changelog
@@ -1,6 +1,14 @@
 Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
+version 0.5.4:
+
+- Fix memory corruption in wmv parsing (addresses CVE-2010-3908)
+- Fix heap corruption crashes (addresses CVE-2011-0722)
+- Fix crashes in vorbis decoding found by zzuf (addresses CVE-2010-4704)
+- Fix another crash in vorbis decoder (addresses CVE-2011-0480, Chrome issue 68115)
+
+
 
 version 0.5.3:
 

From f7494394ee8b375c9962e7528e7b7de6db76518e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 9 Nov 2009 22:10:43 +0000
Subject: [PATCH 110/315] Make get_bits_left() available for use in libavcodec
 (was previously held private in dv.c for some reason). See "[PATCH]
 get_bits_left()" thread.

Originally committed as revision 20490 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit c47ca25e74bbe465cdc8b99d4f6ab4f0ad5e4229)
---
 libavcodec/bitstream.h | 5 +++++
 libavcodec/dv.c        | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h
index e1ec934b28..8941da24cb 100644
--- a/libavcodec/bitstream.h
+++ b/libavcodec/bitstream.h
@@ -946,4 +946,9 @@ static inline int decode210(GetBitContext *gb){
         return 2 - get_bits1(gb);
 }
 
+static inline int get_bits_left(GetBitContext *gb)
+{
+    return gb->size_in_bits - get_bits_count(gb);
+}
+
 #endif /* AVCODEC_BITSTREAM_H */
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index 59ebb088e6..be4a746de5 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -414,11 +414,6 @@ static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5;
 /* see dv_88_areas and dv_248_areas for details */
 static const int mb_area_start[5] = { 1, 6, 21, 43, 64 };
 
-static inline int get_bits_left(GetBitContext *s)
-{
-    return s->size_in_bits - get_bits_count(s);
-}
-
 static inline int put_bits_left(PutBitContext* s)
 {
     return (s->buf_end - s->buf) * 8 - put_bits_count(s);

From 8069e2f6fbd79e3d3d2ba17f5f097475b43e2921 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sat, 19 Feb 2011 11:33:01 +0100
Subject: [PATCH 111/315] Fix invalid reads in VC1 decoder

Patch discussed and taken from https://roundup.ffmpeg.org/issue2584
(cherry picked from commit 2bbec1eda46d907605772a8b6e8263caa4bc4c82)

Change related to CVE-2011-0723
---
 libavcodec/vc1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 03257b8f37..619e9030cb 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -2366,7 +2366,7 @@ static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip, int *value,
     if (index != vc1_ac_sizes[codingset] - 1) {
         run = vc1_index_decode_table[codingset][index][0];
         level = vc1_index_decode_table[codingset][index][1];
-        lst = index >= vc1_last_decode_table[codingset];
+        lst = index >= vc1_last_decode_table[codingset] || get_bits_left(gb) < 0;
         if(get_bits1(gb))
             level = -level;
     } else {

From 808f9ce727fb05058a43de8d64539eddf5fa74d6 Mon Sep 17 00:00:00 2001
From: Kostya Shishkov <kostya.shishkov@gmail.com>
Date: Tue, 24 Nov 2009 06:05:41 +0000
Subject: [PATCH 112/315] Call avcodec_set_dimensions() instead of simply
 setting avctx->width/height when frame dimensions change in RV3/4.

Originally committed as revision 20595 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit d90aeeaf569e4a08c30b3d1d09c3cff3a86eb431)
---
 libavcodec/rv34.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 9fe3919ca1..4d8d9689c7 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1247,8 +1247,9 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
         if(s->width != r->si.width || s->height != r->si.height){
             av_log(s->avctx, AV_LOG_DEBUG, "Changing dimensions to %dx%d\n", r->si.width,r->si.height);
             MPV_common_end(s);
-            s->width  = s->avctx->width  = r->si.width;
-            s->height = s->avctx->height = r->si.height;
+            s->width  = r->si.width;
+            s->height = r->si.height;
+            avcodec_set_dimensions(s->avctx, s->width, s->height);
             if(MPV_common_init(s) < 0)
                 return -1;
             r->intra_types_hist = av_realloc(r->intra_types_hist, s->b4_stride * 4 * 2 * sizeof(*r->intra_types_hist));

From 2adad90ae767f2129d454885e268ff765812244a Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <Reinhard.Tartler@informatik.uni-erlangen.de>
Date: Fri, 18 Feb 2011 17:06:06 +0100
Subject: [PATCH 113/315] Amend Changelog for 0.5.4

---
 Changelog | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Changelog b/Changelog
index 6d631fe3fc..31e3bd5b21 100644
--- a/Changelog
+++ b/Changelog
@@ -6,7 +6,8 @@ version 0.5.4:
 - Fix memory corruption in wmv parsing (addresses CVE-2010-3908)
 - Fix heap corruption crashes (addresses CVE-2011-0722)
 - Fix crashes in vorbis decoding found by zzuf (addresses CVE-2010-4704)
-- Fix another crash in vorbis decoder (addresses CVE-2011-0480, Chrome issue 68115)
+- Fix another crash in vorbis decoding (addresses CVE-2011-0480, Chrome issue 68115)
+- Fix invalid reads in vc1 decoding (related to CVE-2011-0723)
 
 
From 31c8dcedb2b931ff8d70b125845a6674a51b36a9 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <Reinhard.Tartler@informatik.uni-erlangen.de>
Date: Fri, 18 Feb 2011 17:06:06 +0100
Subject: [PATCH 114/315] release notes for 0.5.4

---
 RELEASE | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/RELEASE b/RELEASE
index cdc923b907..6b769f80d7 100644
--- a/RELEASE
+++ b/RELEASE
@@ -124,3 +124,16 @@ This is (again) another maintenance only release that addresses a fix
 for seekable HTTP and an exploitable bug in the FLIC decoder
 (cf. CVE-2010-3429 for details). Distributors and system integrators are
 encouraged to update and share their patches against this branch.
+
+
+
+* 0.5.4 Feb 20, 2011
+
+General notes
+-------------
+
+This is the first release that we cut after git migration. It is another
+maintenance only release that addresses several security issue that have
+brought to our attention. In detail, fixes for RV30/40, WMV, Vorbis and
+VC1 have been backported for trunk. Distributors and system integrators
+are encouraged to update and share their patches against this branch.

From 2c4d6aeabc2d8574d62f8428ad10f6049b940302 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 20 Feb 2011 22:12:52 +0100
Subject: [PATCH 115/315] Bump version number for 0.5.4 release.

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index be14282b7f..7d8568351b 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.3
+0.5.4

From c5898d7c1d2e6079fda3e5f5da9345a2af8cbf68 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 6 Mar 2011 11:02:36 +0100
Subject: [PATCH 116/315] Fix a bunch of typos in the release documentation.

---
 Changelog | 10 +++++-----
 RELEASE   | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Changelog b/Changelog
index 31e3bd5b21..e40599cedb 100644
--- a/Changelog
+++ b/Changelog
@@ -3,11 +3,11 @@ releases are sorted from youngest to oldest.
 
 version 0.5.4:
 
-- Fix memory corruption in wmv parsing (addresses CVE-2010-3908)
+- Fix memory corruption in WMV parsing (addresses CVE-2010-3908)
 - Fix heap corruption crashes (addresses CVE-2011-0722)
-- Fix crashes in vorbis decoding found by zzuf (addresses CVE-2010-4704)
-- Fix another crash in vorbis decoding (addresses CVE-2011-0480, Chrome issue 68115)
-- Fix invalid reads in vc1 decoding (related to CVE-2011-0723)
+- Fix crashes in Vorbis decoding found by zzuf (addresses CVE-2010-4704)
+- Fix another crash in Vorbis decoding (addresses CVE-2011-0480, Chrome issue 68115)
+- Fix invalid reads in VC-1 decoding (related to CVE-2011-0723)
 
 
@@ -23,7 +23,7 @@ version 0.5.3:
 version 0.5.2:
 
 - Hurd support
-- PowerPC without Altivec compilation issues
+- PowerPC without AltiVec compilation issues
 - validate channels and samplerate in the Vorbis decoder
 
 
diff --git a/RELEASE b/RELEASE
index 6b769f80d7..09d4bd397a 100644
--- a/RELEASE
+++ b/RELEASE
@@ -109,7 +109,7 @@ FFmpeg library.
 General notes
 -------------
 
-This is a maintenance only release that addresses a small number of security
+This is a maintenance-only release that addresses a small number of security
 and portability issues. Distributors and system integrators are encouraged
 to update and share their patches against this branch.
 
@@ -120,20 +120,20 @@ to update and share their patches against this branch.
 General notes
 -------------
 
-This is (again) another maintenance only release that addresses a fix
+This is (again) another maintenance-only release that addresses a fix
 for seekable HTTP and an exploitable bug in the FLIC decoder
 (cf. CVE-2010-3429 for details). Distributors and system integrators are
 encouraged to update and share their patches against this branch.
 
 
-* 0.5.4 Feb 20, 2011
+* 0.5.4 Feb 24, 2011
 
 General notes
 -------------
 
 This is the first release that we cut after git migration. It is another
-maintenance only release that addresses several security issue that have
+maintenance-only release that addresses several security issues that were
 brought to our attention. In detail, fixes for RV30/40, WMV, Vorbis and
-VC1 have been backported for trunk. Distributors and system integrators
+VC-1 have been backported from trunk. Distributors and system integrators
 are encouraged to update and share their patches against this branch.

From 18c5fe919f4b1818ebdf405812c5a2d16174688f Mon Sep 17 00:00:00 2001
From: Kostya <kostya.shishkov@gmail.com>
Date: Tue, 15 Mar 2011 09:19:43 +0000
Subject: [PATCH 117/315] Do not attempt to decode APE file with no frames

This fixes invalid reads/writes with this sample:
http://packetstorm.linuxsecurity.com/1103-exploits/vlc105-dos.txt
(cherry picked from commit 8312e3fc9041027a33c8bc667bb99740fdf41dd5)
---
 libavformat/ape.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavformat/ape.c b/libavformat/ape.c
index fac967879a..3031e4767b 100644
--- a/libavformat/ape.c
+++ b/libavformat/ape.c
@@ -335,6 +335,10 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
             url_fskip(pb, ape->wavheaderlength);
     }
 
+    if(!ape->totalframes){
+        av_log(s, AV_LOG_ERROR, "No frames in the file!\n");
+        return AVERROR(EINVAL);
+    }
     if(ape->totalframes > UINT_MAX / sizeof(APEFrame)){
         av_log(s, AV_LOG_ERROR, "Too many frames: %d\n", ape->totalframes);
         return -1;

From f17b89278709423b7eb76d7ed5eec5f82df57329 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 17 Mar 2011 13:09:40 +0100
Subject: [PATCH 118/315] document APE patch

---
 Changelog | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Changelog b/Changelog
index e40599cedb..fbbabc2e4c 100644
--- a/Changelog
+++ b/Changelog
@@ -8,6 +8,8 @@ version 0.5.4:
 - Fix crashes in Vorbis decoding found by zzuf (addresses CVE-2010-4704)
 - Fix another crash in Vorbis decoding (addresses CVE-2011-0480, Chrome issue 68115)
 - Fix invalid reads in VC-1 decoding (related to CVE-2011-0723)
+- Do not attempt to decode APE file with no frames
+  (adresses http://packetstorm.linuxsecurity.com/1103-exploits/vlc105-dos.txt)
 
 
From 52ee20f2aa2936e0fb0a90e81c39135a8f2394e9 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 17 Mar 2011 13:10:27 +0100
Subject: [PATCH 119/315] update release date

---
 RELEASE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE b/RELEASE
index 09d4bd397a..2f7e2c6dde 100644
--- a/RELEASE
+++ b/RELEASE
@@ -127,7 +127,7 @@ encouraged to update and share their patches against this branch.
 
 
-* 0.5.4 Feb 24, 2011
+* 0.5.4 Mar 17, 2011
 
 General notes
 -------------

From eed5697f99927c78956fde6ee6f1ac87f3c6d261 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 21 Apr 2011 22:03:24 +0200
Subject: [PATCH 120/315] mjpeg: Detect overreads in mjpeg_decode_scan() and
 error out.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Ronald S. Bultje <rbultje@google.com>
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/mjpegdec.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 145719c16e..e5c9f38b93 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -784,6 +784,10 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, i
             if (s->restart_interval && !s->restart_count)
                 s->restart_count = s->restart_interval;
 
+            if(get_bits_count(&s->gb)>s->gb.size_in_bits){
+                av_log(s->avctx, AV_LOG_ERROR, "overread %d\n", get_bits_count(&s->gb) - s->gb.size_in_bits);
+                return -1;
+            }
             for(i=0;i<nb_components;i++) {
                 uint8_t *ptr;
                 int n, h, v, x, y, c, j;

From 8210ee22e2f69d540f8835523dd78a205ae6c2a6 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 21 Apr 2011 22:04:21 +0200
Subject: [PATCH 121/315] AMV: Fix possibly exploitable crash. Reported-at:
 Thu, 21 Apr 2011 14:38:25 +0000 Reported-by: Dominic Chell
 <Dominic.Chell@ngssecure.com> Signed-off-by: Michael Niedermayer
 <michaelni@gmx.at>

---
 libavcodec/sp5xdec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 920b32d8cd..f1e436c905 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -88,7 +88,6 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
     recoded[j++] = 0xFF;
     recoded[j++] = 0xD9;
 
-    avctx->flags &= ~CODEC_FLAG_EMU_EDGE;
     i = ff_mjpeg_decode_frame(avctx, data, data_size, recoded, j);
 
     av_free(recoded);

From 24cd7c5df78c53c1d1a36b81fb130594e84b3f12 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 25 Mar 2011 02:24:32 +0100
Subject: [PATCH 122/315] Fix apparently exploitable race condition.
 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

---
 libavutil/log.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavutil/log.c b/libavutil/log.c
index 4bb9652c2c..fb773d0712 100644
--- a/libavutil/log.c
+++ b/libavutil/log.c
@@ -33,7 +33,8 @@ void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
 {
     static int print_prefix=1;
     static int count;
-    static char line[1024], prev[1024];
+    static char prev[1024];
+    char line[1024];
     AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
     if(level>av_log_level)
         return;

From 04888edef344b09daaabbc30b3fa5ab32d3bd866 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 10 Aug 2011 18:52:11 +0100
Subject: [PATCH 123/315] cavs: fix some crashes with invalid bitstreams

This removes all valgrind-reported invalid writes with one
specific test file.

Fixes http://www.ocert.org/advisories/ocert-2011-002.html

Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit 4a71da0f3ab7f5542decd11c81994f849d5b2c78)

Fixes CVE-2011-3362, CVE-2011-3973, CVE-2011-3974

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/cavsdec.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index a1895bcb88..8d30040d74 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -130,12 +130,14 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb,
                 r++;
             mask = -(level_code & 1);
             level = (level^mask) - mask;
-        } else {
+        } else if (level_code >= 0) {
             level = r->rltab[level_code][0];
             if(!level) //end of block signal
                 break;
             run   = r->rltab[level_code][1];
             r += r->rltab[level_code][2];
+        } else {
+            break;
         }
         level_buf[i] = level;
         run_buf[i] = run;
@@ -189,7 +191,8 @@ static inline int decode_residual_inter(AVSContext *h) {
 
 static int decode_mb_i(AVSContext *h, int cbp_code) {
     GetBitContext *gb = &h->s.gb;
-    int block, pred_mode_uv;
+    unsigned pred_mode_uv;
+    int block;
     uint8_t top[18];
     uint8_t *left = NULL;
     uint8_t *d;
@@ -445,6 +448,8 @@ static inline int check_for_slice(AVSContext *h) {
     if((show_bits_long(gb,24+align) & 0xFFFFFF) == 0x000001) {
         skip_bits_long(gb,24+align);
         h->stc = get_bits(gb,8);
+        if (h->stc >= h->mb_height)
+            return 0;
         decode_slice_header(h,gb);
         return 1;
     }
@@ -648,7 +653,7 @@ static int cavs_decode_frame(AVCodecContext * avctx,void *data, int *data_size,
     buf_end = buf + buf_size;
     for(;;) {
         buf_ptr = ff_find_start_code(buf_ptr,buf_end, &stc);
-        if(stc & 0xFFFFFE00)
+        if((stc & 0xFFFFFE00) || buf_ptr == buf_end)
             return FFMAX(0, buf_ptr - buf - s->parse_context.last_index);
         input_size = (buf_end - buf_ptr)*8;
         switch(stc) {

From 4f07a3aa2c6b7356c28646692261aa9080605fcc Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 28 Jul 2011 14:59:54 +0200
Subject: [PATCH 124/315] Fix memory (re)allocation in matroskadec.c, related
 to MSVR-11-0080.

Whitespace of the patch cleaned up by Aurel
Some of the issues have been reported by Steve Manzuik / Microsoft Vulnerability Research (MSVR)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

(cherry picked from commit 956c901c68eff78288f40e3c8f41ee2fa081d4a8)

Further suggestions from Kostya <kostya.shishkov@gmail.com> have been
implemented by Reinhard Tartler <siretart@tauware.de>

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 77d2ef13a8fa630e5081f14bde3fd20f84c90aec)

NB: MSVR-11-0080 doesn't seem to exist. This issue seems to be known
as MSVR11-011 instead.

Fixes: CVE-2011-3504

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/matroskadec.c | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index ac12f1ea66..d9ffec3727 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -752,11 +752,15 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
     uint32_t id = syntax->id;
     uint64_t length;
     int res;
+    void *newelem;
 
     data = (char *)data + syntax->data_offset;
     if (syntax->list_elem_size) {
         EbmlList *list = data;
-        list->elem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
+        newelem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
+        if (!newelem)
+            return AVERROR(ENOMEM);
+        list->elem = newelem;
         data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
         memset(data, 0, syntax->list_elem_size);
         list->nb_elem++;
@@ -873,6 +877,7 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
     uint8_t* data = *buf;
     int isize = *buf_size;
     uint8_t* pkt_data = NULL;
+    uint8_t* newpktdata;
     int pkt_size = isize;
     int result = 0;
     int olen;
@@ -899,7 +904,12 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
         zstream.avail_in = isize;
         do {
             pkt_size *= 3;
-            pkt_data = av_realloc(pkt_data, pkt_size);
+            newpktdata = av_realloc(pkt_data, pkt_size);
+            if (!newpktdata) {
+                inflateEnd(&zstream);
+                goto failed;
+            }
+            pkt_data = newpktdata;
             zstream.avail_out = pkt_size - zstream.total_out;
             zstream.next_out = pkt_data + zstream.total_out;
             result = inflate(&zstream, Z_NO_FLUSH);
@@ -920,7 +930,12 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
         bzstream.avail_in = isize;
         do {
             pkt_size *= 3;
-            pkt_data = av_realloc(pkt_data, pkt_size);
+            newpktdata = av_realloc(pkt_data, pkt_size);
+            if (!newpktdata) {
+                BZ2_bzDecompressEnd(&bzstream);
+                goto failed;
+            }
+            pkt_data = newpktdata;
             bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
             bzstream.next_out = pkt_data + bzstream.total_out_lo32;
             result = BZ2_bzDecompress(&bzstream);
@@ -975,13 +990,17 @@ static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
     }
 }
 
-static void matroska_merge_packets(AVPacket *out, AVPacket *in)
+static int matroska_merge_packets(AVPacket *out, AVPacket *in)
 {
-    out->data = av_realloc(out->data, out->size+in->size);
+    void *newdata = av_realloc(out->data, out->size+in->size);
+    if (!newdata)
+        return AVERROR(ENOMEM);
+    out->data = newdata;
     memcpy(out->data+out->size, in->data, in->size);
     out->size += in->size;
     av_destruct_packet(in);
     av_free(in);
+    return 0;
 }
 
 static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
@@ -1462,11 +1481,13 @@ static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
         av_free(matroska->packets[0]);
         if (matroska->num_packets > 1) {
+            void *newpackets;
             memmove(&matroska->packets[0], &matroska->packets[1],
                     (matroska->num_packets - 1) * sizeof(AVPacket *));
-            matroska->packets =
-                av_realloc(matroska->packets, (matroska->num_packets - 1) *
-                           sizeof(AVPacket *));
+            newpackets = av_realloc(matroska->packets,
+                            (matroska->num_packets - 1) * sizeof(AVPacket *));
+            if (newpackets)
+                matroska->packets = newpackets;
         } else {
             av_freep(&matroska->packets);
         }

From 46f9a6203aa37b0f04a63379cb04304c596dc779 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Fri, 1 Jul 2011 02:38:28 +0200
Subject: [PATCH 125/315] Fix possible double free when encoding using xvid.
 (cherry picked from commit 315f0e3fd8dcbd1362276b7407dad2e97cccc4b7)

---
 libavcodec/libxvidff.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/libxvidff.c b/libavcodec/libxvidff.c
index bdf70a09df..0bbb7123c9 100644
--- a/libavcodec/libxvidff.c
+++ b/libavcodec/libxvidff.c
@@ -485,6 +485,7 @@ av_cold int ff_xvid_encode_close(AVCodecContext *avctx) {
     if( x->twopassbuffer != NULL ) {
         av_free(x->twopassbuffer);
         av_free(x->old_twopassbuffer);
+        avctx->stats_out = NULL;
     }
     if( x->twopassfile != NULL )
         av_free(x->twopassfile);

From 80fb9f2c57c1b76c76948ee3dad6b5bb6f381436 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 10 Aug 2011 17:29:51 +0200
Subject: [PATCH 126/315] cavsdec: avoid possible crash with crafted input

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 9f06c1c61e876e930753da200bfe835817e30a53)
---
 libavcodec/cavsdec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index 8d30040d74..25b97bfde8 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -165,7 +165,7 @@ static inline int decode_residual_inter(AVSContext *h) {
 
     /* get coded block pattern */
     int cbp= get_ue_golomb(&h->s.gb);
-    if(cbp > 63){
+    if(cbp > 63U){
         av_log(h->s.avctx, AV_LOG_ERROR, "illegal inter cbp\n");
         return -1;
     }
@@ -225,7 +225,7 @@ static int decode_mb_i(AVSContext *h, int cbp_code) {
     /* get coded block pattern */
     if(h->pic_type == FF_I_TYPE)
         cbp_code = get_ue_golomb(gb);
-    if(cbp_code > 63){
+    if(cbp_code > 63U){
         av_log(h->s.avctx, AV_LOG_ERROR, "illegal intra cbp\n");
         return -1;
     }

From 70f01f12626caacd3926e11ac1ebc705e67016e7 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Fri, 9 Sep 2011 13:24:19 -0700
Subject: [PATCH 127/315] indeo2: init_get_bits size in bits instead of bytes
 (cherry picked from commit 68ca330cbd479111db9cb7649d7530ad59f04cc8)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/indeo2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/indeo2.c b/libavcodec/indeo2.c
index 40c561a559..e1bfd08870 100644
--- a/libavcodec/indeo2.c
+++ b/libavcodec/indeo2.c
@@ -162,7 +162,7 @@ static int ir2_decode_frame(AVCodecContext *avctx,
 #endif
     start = 48; /* hardcoded for now */
 
-    init_get_bits(&s->gb, buf + start, buf_size - start);
+    init_get_bits(&s->gb, buf + start, (buf_size - start) * 8);
 
     if (s->decode_delta) { /* intraframe */
         ir2_decode_plane(s, avctx->width, avctx->height,

From 457f869b73ff9c49486c31a6574eb6601f8eb2d1 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Fri, 9 Sep 2011 13:26:49 -0700
Subject: [PATCH 128/315] indeo2: fail if input buffer too small (cherry picked
 from commit b7ce4f1d1c3add86ece7ca595ea6c4a10b471055)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/indeo2.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libavcodec/indeo2.c b/libavcodec/indeo2.c
index e1bfd08870..8ee6a86e9e 100644
--- a/libavcodec/indeo2.c
+++ b/libavcodec/indeo2.c
@@ -153,6 +153,13 @@ static int ir2_decode_frame(AVCodecContext *avctx,
         return -1;
     }
 
+    start = 48; /* hardcoded for now */
+
+    if (start >= buf_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "input buffer size too small (%d)\n", buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+
     s->decode_delta = buf[18];
 
     /* decide whether frame uses deltas or not */
@@ -160,7 +167,6 @@ static int ir2_decode_frame(AVCodecContext *avctx,
     for (i = 0; i < buf_size; i++)
         buf[i] = ff_reverse[buf[i]];
 #endif
-    start = 48; /* hardcoded for now */
 
     init_get_bits(&s->gb, buf + start, (buf_size - start) * 8);
 

From 7cb35d49547e64c0bbd7e66e489e954f16b823c5 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Fri, 9 Sep 2011 14:50:33 -0700
Subject: [PATCH 129/315] cljr: init_get_bits size in bits instead of bytes
 (cherry picked from commit 0c1f5b93d9b97c4cc3684ba91a040e90bfc760d2)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/cljr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/cljr.c b/libavcodec/cljr.c
index 9f7ab61efd..39ac7d6d26 100644
--- a/libavcodec/cljr.c
+++ b/libavcodec/cljr.c
@@ -60,7 +60,7 @@ static int decode_frame(AVCodecContext *avctx,
     p->pict_type= FF_I_TYPE;
     p->key_frame= 1;
 
-    init_get_bits(&a->gb, buf, buf_size);
+    init_get_bits(&a->gb, buf, buf_size * 8);
 
     for(y=0; y<avctx->height; y++){
         uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ];

From ecd6fa11c2cde5ca3fb787a2cfeb594d6205684b Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Mon, 12 Sep 2011 20:50:13 +0200
Subject: [PATCH 130/315] Check for invalid packet size in the smacker demuxer.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit e055932f5636a82275837968eea9c8fcb5bca474)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/smacker.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/smacker.c b/libavformat/smacker.c
index 8ec28bcd3b..fa4f6312c5 100644
--- a/libavformat/smacker.c
+++ b/libavformat/smacker.c
@@ -292,6 +292,8 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt)
             }
             flags >>= 1;
         }
+        if (frame_size < 0)
+            return AVERROR_INVALIDDATA;
         if (av_new_packet(pkt, frame_size + 768))
             return AVERROR(ENOMEM);
         if(smk->frm_size[smk->cur_frame] & 1)

From 19431d4d4ee2bb727d3810d40db846bb82dba658 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 11 Sep 2011 19:17:40 +0200
Subject: [PATCH 131/315] ape demuxer: fix segfault on memory allocation
 failure.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 273aab99bf7be2bcda95dd64101c2317ee0fcb99)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/ape.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/ape.c b/libavformat/ape.c
index 3031e4767b..7511d936da 100644
--- a/libavformat/ape.c
+++ b/libavformat/ape.c
@@ -356,6 +356,8 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
 
     if (ape->seektablelength > 0) {
         ape->seektable = av_malloc(ape->seektablelength);
+        if (!ape->seektable)
+            return AVERROR(ENOMEM);
         for (i = 0; i < ape->seektablelength / sizeof(uint32_t); i++)
             ape->seektable[i] = get_le32(pb);
     }

From 2e17744a90d8ea040bf55f9b77bab2e6e17e4db9 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Mon, 12 Sep 2011 20:50:34 +0200
Subject: [PATCH 132/315] Fixed off by one packet size allocation in the
 smacker demuxer.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit a92d0fa5d234582583d41b67dddecffc2c819573)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/smacker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/smacker.c b/libavformat/smacker.c
index fa4f6312c5..a34dfde75c 100644
--- a/libavformat/smacker.c
+++ b/libavformat/smacker.c
@@ -294,7 +294,7 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt)
         }
         if (frame_size < 0)
             return AVERROR_INVALIDDATA;
-        if (av_new_packet(pkt, frame_size + 768))
+        if (av_new_packet(pkt, frame_size + 769))
             return AVERROR(ENOMEM);
         if(smk->frm_size[smk->cur_frame] & 1)
             palchange |= 2;

From f6d3dfe78b431a5c17c3a102c338aa0b2d095ac3 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Mon, 12 Sep 2011 23:46:49 +0200
Subject: [PATCH 133/315] Check and propagate errors when VLC trees cannot be
 built in smacker decoder.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit 9676ffba8346791f494451e68d2a3b37a2918a9b)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/smacker.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 03241cf921..8cef6423c1 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -133,10 +133,10 @@ static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc, DBCtx *ctx
             return -1;
         }
         b1 = get_bits_count(gb);
-        i1 = get_vlc2(gb, ctx->v1->table, SMKTREE_BITS, 3);
+        i1 = ctx->v1->table ? get_vlc2(gb, ctx->v1->table, SMKTREE_BITS, 3) : 0;
         b1 = get_bits_count(gb) - b1;
         b2 = get_bits_count(gb);
-        i2 = get_vlc2(gb, ctx->v2->table, SMKTREE_BITS, 3);
+        i2 = ctx->v2->table ? get_vlc2(gb, ctx->v2->table, SMKTREE_BITS, 3) : 0;
         b2 = get_bits_count(gb) - b2;
         val = ctx->recode1[i1] | (ctx->recode2[i2] << 8);
         if(val == ctx->escapes[0]) {
@@ -289,7 +289,8 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->mmap_tbl[0] = 0;
         smk->mmap_last[0] = smk->mmap_last[1] = smk->mmap_last[2] = 1;
     } else {
-        smacker_decode_header_tree(smk, &gb, &smk->mmap_tbl, smk->mmap_last, mmap_size);
+        if (smacker_decode_header_tree(smk, &gb, &smk->mmap_tbl, smk->mmap_last, mmap_size))
+            return -1;
     }
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping MCLR tree\n");
@@ -297,7 +298,8 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->mclr_tbl[0] = 0;
         smk->mclr_last[0] = smk->mclr_last[1] = smk->mclr_last[2] = 1;
     } else {
-        smacker_decode_header_tree(smk, &gb, &smk->mclr_tbl, smk->mclr_last, mclr_size);
+        if (smacker_decode_header_tree(smk, &gb, &smk->mclr_tbl, smk->mclr_last, mclr_size))
+            return -1;
     }
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping FULL tree\n");
@@ -305,7 +307,8 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->full_tbl[0] = 0;
         smk->full_last[0] = smk->full_last[1] = smk->full_last[2] = 1;
     } else {
-        smacker_decode_header_tree(smk, &gb, &smk->full_tbl, smk->full_last, full_size);
+        if (smacker_decode_header_tree(smk, &gb, &smk->full_tbl, smk->full_last, full_size))
+            return -1;
     }
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping TYPE tree\n");
@@ -313,7 +316,8 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->type_tbl[0] = 0;
         smk->type_last[0] = smk->type_last[1] = smk->type_last[2] = 1;
     } else {
-        smacker_decode_header_tree(smk, &gb, &smk->type_tbl, smk->type_last, type_size);
+        if (smacker_decode_header_tree(smk, &gb, &smk->type_tbl, smk->type_last, type_size))
+            return -1;
     }
 
     return 0;
@@ -527,8 +531,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
         return -1;
     }
 
-    decode_header_trees(c);
-
+    if (decode_header_trees(c))
+        return -1;
 
     return 0;
 }

From f936799f0b72799000cd842922a05005f70cb553 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Mon, 12 Sep 2011 23:49:36 +0200
Subject: [PATCH 134/315] Check for invalid VLC value in smacker decoder.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit 6489455495fc5bfbebcfe3f57e5d4fdd6a781091)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/smacker.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 8cef6423c1..716b63fc6f 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -138,6 +138,8 @@ static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc, DBCtx *ctx
         b2 = get_bits_count(gb);
         i2 = ctx->v2->table ? get_vlc2(gb, ctx->v2->table, SMKTREE_BITS, 3) : 0;
         b2 = get_bits_count(gb) - b2;
+        if (i1 < 0 || i2 < 0)
+            return -1;
         val = ctx->recode1[i1] | (ctx->recode2[i2] << 8);
         if(val == ctx->escapes[0]) {
             ctx->last[0] = hc->current;

From 52b8edc94c9fa4c613fdfdc0a53512d0ac26345f Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 11 Sep 2011 23:26:12 +0200
Subject: [PATCH 135/315] oggdec: fix out of bound write in the ogg demuxer

Between ogg_save() and ogg_restore() calls, the number of streams
could have been reduced.

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
(cherry picked from commit 0e7efb9d23c3641d50caa288818e8c27647ce74d)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/oggdec.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 54406f5479..a810b95dee 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -90,14 +90,24 @@ ogg_restore (AVFormatContext * s, int discard)
     ogg->state = ost->next;
 
     if (!discard){
+        struct ogg_stream *old_streams = ogg->streams;
+
         for (i = 0; i < ogg->nstreams; i++)
             av_free (ogg->streams[i].buf);
 
         url_fseek (bc, ost->pos, SEEK_SET);
         ogg->curidx = ost->curidx;
         ogg->nstreams = ost->nstreams;
-        memcpy(ogg->streams, ost->streams,
-               ost->nstreams * sizeof(*ogg->streams));
+        ogg->streams = av_realloc (ogg->streams,
+                                   ogg->nstreams * sizeof (*ogg->streams));
+
+        if (ogg->streams) {
+            memcpy(ogg->streams, ost->streams,
+                   ost->nstreams * sizeof(*ogg->streams));
+        } else {
+            av_free(old_streams);
+            ogg->nstreams = 0;
+        }
     }
 
     av_free (ost);

From 9cda3d79154554c803877fcf22b1b2ffe66f5b1e Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 18 Sep 2011 00:03:08 +0200
Subject: [PATCH 136/315] rv10: Reject slices that does not have the same type
 as the first one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This prevents crashes with some corrupted bitstreams.

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit 4a29b471869353c3077fb4b25b6518eb1047afb7)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/rv10.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index 71a25d650f..ddf0f1db1b 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -642,6 +642,11 @@ static int rv10_decode_packet(AVCodecContext *avctx,
         if(MPV_frame_start(s, avctx) < 0)
             return -1;
         ff_er_frame_start(s);
+    } else {
+        if (s->current_picture_ptr->pict_type != s->pict_type) {
+            av_log(s->avctx, AV_LOG_ERROR, "Slice type mismatch\n");
+            return -1;
+        }
     }
 
 #ifdef DEBUG

From ef93642aac0ddf17ecd35e3a1e463a5e7f4f9ac5 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 17 Sep 2011 23:43:58 +0200
Subject: [PATCH 137/315] rv34: Avoid NULL dereference on corrupted bitstream
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rv34_decode_slice() can return without allocating any pictures.

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit d0f6ab0298f2309c6104626787ed73416298b019)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/rv34.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 4d8d9689c7..3c0e51983b 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1452,7 +1452,7 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
             break;
     }
 
-    if(last){
+    if(last && s->current_picture_ptr){
         if(r->loop_filter)
             r->loop_filter(r, s->mb_height - 1);
         ff_er_frame_end(s);

From a9ded3d272775619760b5f0125b76d019ac3186a Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Mon, 19 Sep 2011 22:48:53 +0200
Subject: [PATCH 138/315] rv34: Check for invalid slice offsets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit 4cc7732386eb36661ed22d1200339b38a5fa60bc)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/rv34.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 3c0e51983b..13415e0cd9 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1401,8 +1401,9 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
         slice_count = avctx->slice_count;
 
     //parse first slice header to check whether this frame can be decoded
-    if(get_slice_offset(avctx, slices_hdr, 0) > buf_size){
-        av_log(avctx, AV_LOG_ERROR, "Slice offset is greater than frame size\n");
+    if(get_slice_offset(avctx, slices_hdr, 0) < 0 ||
+       get_slice_offset(avctx, slices_hdr, 0) > buf_size){
+        av_log(avctx, AV_LOG_ERROR, "Slice offset is invalid\n");
         return -1;
     }
     init_get_bits(&s->gb, buf+get_slice_offset(avctx, slices_hdr, 0), buf_size-get_slice_offset(avctx, slices_hdr, 0));
@@ -1430,8 +1431,8 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
         else
             size= get_slice_offset(avctx, slices_hdr, i+1) - offset;
 
-        if(offset > buf_size){
-            av_log(avctx, AV_LOG_ERROR, "Slice offset is greater than frame size\n");
+        if(offset < 0 || offset > buf_size || size < 0){
+            av_log(avctx, AV_LOG_ERROR, "Slice offset is invalid\n");
             break;
         }
 

From 4e0315b30ecb41b6fa349fba0d8eb1adaa4fe3bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sun, 17 Jul 2011 15:22:36 +0200
Subject: [PATCH 139/315] Check extradata size on resolution change.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ignore resolution change if resolution not defined in extradata.

Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de>
(cherry picked from commit 09c5f990bc7629dfbee8c760fd485936c60a7b40)
---
 libavcodec/rv30.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
index e1b3ad1db6..a2dc57e2e1 100644
--- a/libavcodec/rv30.c
+++ b/libavcodec/rv30.c
@@ -51,6 +51,11 @@ static int rv30_parse_slice_header(RV34DecContext *r, GetBitContext *gb, SliceIn
     skip_bits1(gb);
     si->pts = get_bits(gb, 13);
     rpr = get_bits(gb, r->rpr);
+    if (r->s.avctx->extradata_size < 8 + rpr*2) {
+        av_log(r->s.avctx, AV_LOG_WARNING,
+               "Extradata does not contain selected resolution\n");
+        rpr = 0;
+    }
     if(rpr){
         w = r->s.avctx->extradata[6 + rpr*2] << 2;
         h = r->s.avctx->extradata[7 + rpr*2] << 2;

From 1415ebf031af502835262752b9325c13da0db70b Mon Sep 17 00:00:00 2001
From: Chris Rankin <rankincj@yahoo.com>
Date: Wed, 7 Sep 2011 10:17:30 +0100
Subject: [PATCH 140/315] qcelpdec: fix the return value of
 qcelp_decode_frame(). (cherry picked from commit
 04c13dca8812e8302686887b6e8201d4ad25b7d8)

---
 libavcodec/qcelpdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index 59eff1f919..3a18470b65 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -802,7 +802,7 @@ erasure:
 
     *data_size = 160 * sizeof(*outbuffer);
 
-    return *data_size;
+    return buf_size;
 }
 
 AVCodec qcelp_decoder =

From cc885682e361a5da269cd371c51224b9d5f8104f Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 10 Sep 2011 13:28:13 +0200
Subject: [PATCH 141/315] Fixed deference of NULL pointer in motionpixels
 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 824f98f442996eaee9204b132752cf5114fc94cf)
---
 libavcodec/motionpixels.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/motionpixels.c b/libavcodec/motionpixels.c
index f69dcf95fe..2839d0718c 100644
--- a/libavcodec/motionpixels.c
+++ b/libavcodec/motionpixels.c
@@ -325,7 +325,8 @@ static int mp_decode_frame(AVCodecContext *avctx,
     if (sz == 0)
         goto end;
 
-    init_vlc(&mp->vlc, mp->max_codes_bits, mp->codes_count, &mp->codes[0].size, sizeof(HuffCode), 1, &mp->codes[0].code, sizeof(HuffCode), 4, 0);
+    if (init_vlc(&mp->vlc, mp->max_codes_bits, mp->codes_count, &mp->codes[0].size, sizeof(HuffCode), 1, &mp->codes[0].code, sizeof(HuffCode), 4, 0))
+        goto end;
     mp_decode_frame_helper(mp, &gb);
     free_vlc(&mp->vlc);
 

From 03db051b43a3f7e703a03018be72bc6fdd272641 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Wed, 7 Sep 2011 21:43:03 +0200
Subject: [PATCH 142/315] Fixed segfault with wavpack decoder on corrupted
 decorrelation terms sub-blocks.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit 8bfea4ab4e2cb32bc7bf6f697ee30a238c65d296)
---
 libavcodec/wavpack.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index 12eac33fe7..19e5e057aa 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c
@@ -536,12 +536,13 @@ static int wavpack_decode_frame(AVCodecContext *avctx,
         }
         switch(id & WP_IDF_MASK){
         case WP_ID_DECTERMS:
-            s->terms = size;
-            if(s->terms > MAX_TERMS){
+            if(size > MAX_TERMS){
                 av_log(avctx, AV_LOG_ERROR, "Too many decorrelation terms\n");
+                s->terms = 0;
                 buf += ssize;
                 continue;
             }
+            s->terms = size;
             for(i = 0; i < s->terms; i++) {
                 s->decorr[s->terms - i - 1].value = (*buf & 0x1F) - 5;
                 s->decorr[s->terms - i - 1].delta = *buf >> 5;

From b261ebfd220b05854d0a0cccde30eddb7a7e23dd Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 11 Sep 2011 18:54:01 +0200
Subject: [PATCH 143/315] Fixed segfaults on corruped smacker streams in the
 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit d07ac1853da29ea696243160e02154ebf758d1ee)
---
 libavcodec/smacker.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 716b63fc6f..f790e08214 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -660,6 +660,8 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     } else { //8-bit data
         for(i = stereo; i >= 0; i--)
             pred[i] = get_bits(&gb, 8);
+        if (stereo + unp_size > data_size)
+            return -1;
         for(i = 0; i < stereo; i++)
             *samples++ = (pred[i] - 0x80) << 8;
         for(i = 0; i < unp_size; i++) {

From 6108f04d4f33611c5f878dd245fd268e18a06f98 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 11 Sep 2011 18:51:52 +0200
Subject: [PATCH 144/315] Fixed segfault on corrupted smacker streams in the
 demuxer.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit d0121e8d969cde74fa7dbd96d3602109b051e701)
---
 libavformat/smacker.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavformat/smacker.c b/libavformat/smacker.c
index a34dfde75c..b4fc5f7176 100644
--- a/libavformat/smacker.c
+++ b/libavformat/smacker.c
@@ -284,6 +284,10 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt)
                 frame_size -= 4;
                 smk->curstream++;
                 smk->bufs[smk->curstream] = av_realloc(smk->bufs[smk->curstream], size);
+                if (!smk->bufs[smk->curstream]) {
+                    smk->buf_sizes[smk->curstream] = 0;
+                    return AVERROR(ENOMEM);
+                }
                 smk->buf_sizes[smk->curstream] = size;
                 ret = get_buffer(s->pb, smk->bufs[smk->curstream], size);
                 if(ret != size)

From eb6b0ed8be31047002954abe659d2018a4ce3338 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 11 Sep 2011 19:17:43 +0200
Subject: [PATCH 145/315] Fixed invalid read access on extra data in cinepak
 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit dc255275f6293a060518271a151e1ce75499e874)
---
 libavcodec/cinepak.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/cinepak.c b/libavcodec/cinepak.c
index d0f5adb7fc..c248f00ec9 100644
--- a/libavcodec/cinepak.c
+++ b/libavcodec/cinepak.c
@@ -336,7 +336,8 @@ static int cinepak_decode (CinepakContext *s)
              * If the frame header is followed by the bytes FE 00 00 06 00 00 then
              * this is probably one of the two known files that have 6 extra bytes
              * after the frame header. Else, assume 2 extra bytes. */
-            if ((s->data[10] == 0xFE) &&
+            if (s->size >= 16 &&
+                (s->data[10] == 0xFE) &&
                 (s->data[11] == 0x00) &&
                 (s->data[12] == 0x00) &&
                 (s->data[13] == 0x06) &&

From d8439f045220119d718968cf1023f5cd8182f877 Mon Sep 17 00:00:00 2001
From: Kostya Shishkov <kostya.shishkov@gmail.com>
Date: Mon, 12 Sep 2011 11:39:53 +0200
Subject: [PATCH 146/315] rv34: check that subsequent slices have the same type
 as first one.

This prevents some crashes when corrupted bitstream reports e.g. P-type
slice in I-frame. Official RealVideo decoder demands all slices to be
of the same type too.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 23a1f0c59241465ba30103388029a7afc0ead909)
---
 libavcodec/rv34.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 13415e0cd9..da20dbb10b 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1269,6 +1269,13 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
             r->next_pts = r->cur_pts;
         }
         s->mb_x = s->mb_y = 0;
+    } else {
+        int slice_type = r->si.type ? r->si.type : FF_I_TYPE;
+
+        if (slice_type != s->pict_type) {
+            av_log(s->avctx, AV_LOG_ERROR, "Slice type mismatch\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
     r->si.end = end;

From d6f8b65417f2170829a774ce07df7fdd34b32367 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Mon, 12 Sep 2011 20:58:35 +0200
Subject: [PATCH 147/315] segafilm: Check for memory allocation failures in
 segafilm demuxer.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 7cbe02575868e7d25acf3d319ece664702700f0a)
---
 libavformat/segafilm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavformat/segafilm.c b/libavformat/segafilm.c
index ae1263cf98..b802b33461 100644
--- a/libavformat/segafilm.c
+++ b/libavformat/segafilm.c
@@ -167,6 +167,8 @@ static int film_read_header(AVFormatContext *s,
     if(film->sample_count >= UINT_MAX / sizeof(film_sample))
         return -1;
     film->sample_table = av_malloc(film->sample_count * sizeof(film_sample));
+    if (!film->sample_table)
+        return AVERROR(ENOMEM);
 
     for(i=0; i<s->nb_streams; i++)
         av_set_pts_info(s->streams[i], 33, 1, film->base_clock);
@@ -238,6 +240,10 @@ static int film_read_packet(AVFormatContext *s,
             av_free(film->stereo_buffer);
             film->stereo_buffer_size = sample->sample_size;
             film->stereo_buffer = av_malloc(film->stereo_buffer_size);
+            if (!film->stereo_buffer) {
+                film->stereo_buffer_size = 0;
+                return AVERROR(ENOMEM);
+            }
         }
 
         pkt->pos= url_ftell(pb);

From 71132596ae098bd5d7937efc9090038d25f326e8 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Mon, 12 Sep 2011 21:09:57 +0200
Subject: [PATCH 148/315] segafilm: Fix potential division by 0 on corrupted
 segafilm streams in the demuxer.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/segafilm.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/libavformat/segafilm.c b/libavformat/segafilm.c
index b802b33461..c1da78998d 100644
--- a/libavformat/segafilm.c
+++ b/libavformat/segafilm.c
@@ -111,11 +111,16 @@ static int film_read_header(AVFormatContext *s,
         film->audio_samplerate = AV_RB16(&scratch[24]);
         film->audio_channels = scratch[21];
         film->audio_bits = scratch[22];
-        if (film->audio_bits == 8)
-            film->audio_type = CODEC_ID_PCM_S8;
-        else if (film->audio_bits == 16)
-            film->audio_type = CODEC_ID_PCM_S16BE;
-        else
+        if (scratch[23] == 2)
+            film->audio_type = CODEC_ID_ADPCM_ADX;
+        else if (film->audio_channels > 0) {
+            if (film->audio_bits == 8)
+                film->audio_type = CODEC_ID_PCM_S8;
+            else if (film->audio_bits == 16)
+                film->audio_type = CODEC_ID_PCM_S16BE;
+            else
+                film->audio_type = CODEC_ID_NONE;
+        } else
             film->audio_type = CODEC_ID_NONE;
     }
 
@@ -189,8 +194,12 @@ static int film_read_header(AVFormatContext *s,
             film->sample_table[i].pts *= film->base_clock;
             film->sample_table[i].pts /= film->audio_samplerate;
 
-            audio_frame_counter += (film->sample_table[i].sample_size /
-                (film->audio_channels * film->audio_bits / 8));
+            if (film->audio_type == CODEC_ID_ADPCM_ADX)
+                audio_frame_counter += (film->sample_table[i].sample_size * 32 /
+                    (18 * film->audio_channels));
+            else if (film->audio_type != CODEC_ID_NONE)
+                audio_frame_counter += (film->sample_table[i].sample_size /
+                    (film->audio_channels * film->audio_bits / 8));
         } else {
             film->sample_table[i].stream = film->video_stream_index;
             film->sample_table[i].pts = AV_RB32(&scratch[8]) & 0x7FFFFFFF;

From 38423fe0b760de19bf71c017e87f87c982551341 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 12 Sep 2011 23:45:21 +0200
Subject: [PATCH 149/315] smacker: add forgotten * found by fenrir

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f98edc73c599badaa0c075fbffb519a150d03d80)
---
 libavcodec/smacker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index f790e08214..95a9beb0a9 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -660,7 +660,7 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     } else { //8-bit data
         for(i = stereo; i >= 0; i--)
             pred[i] = get_bits(&gb, 8);
-        if (stereo + unp_size > data_size)
+        if (stereo + unp_size > *data_size)
             return -1;
         for(i = 0; i < stereo; i++)
             *samples++ = (pred[i] - 0x80) << 8;

From 8135c3552894b096a9f9989dc310ece7bb529403 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 13 Feb 2011 00:19:06 +0000
Subject: [PATCH 150/315] Fix MMX rgb24 to yuv conversion with gcc 4.6

When built with gcc 4.6, the MMX rgb24 to yuv conversion gives
wrong output.  The compiler produces this warning:

libswscale/swscale_template.c:1885:5: warning: use of memory input without lvalue in asm operand 4 is deprecated

Changing the memory operand to a register makes it work.

Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit f344903ca5ce28a833fdd656bc1ed5b16d97e7e9)

Conflicts:

	libswscale/swscale_template.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libswscale/swscale_template.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 6f2e243052..9016778a9c 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -1739,7 +1739,7 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width,
 static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat)
 {
     __asm__ volatile(
-        "movq                    24+%4, %%mm6       \n\t"
+        "movq                   24(%4), %%mm6       \n\t"
         "mov                        %3, %%"REG_a"   \n\t"
         "pxor                    %%mm7, %%mm7       \n\t"
         "1:                                         \n\t"
@@ -1750,9 +1750,9 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *
         "punpcklbw               %%mm7, %%mm1       \n\t"
         "movq                    %%mm0, %%mm2       \n\t"
         "movq                    %%mm1, %%mm3       \n\t"
-        "pmaddwd                    %4, %%mm0       \n\t"
-        "pmaddwd                  8+%4, %%mm1       \n\t"
-        "pmaddwd                 16+%4, %%mm2       \n\t"
+        "pmaddwd                  (%4), %%mm0       \n\t"
+        "pmaddwd                 8(%4), %%mm1       \n\t"
+        "pmaddwd                16(%4), %%mm2       \n\t"
         "pmaddwd                 %%mm6, %%mm3       \n\t"
         "paddd                   %%mm1, %%mm0       \n\t"
         "paddd                   %%mm3, %%mm2       \n\t"
@@ -1764,9 +1764,9 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *
         "punpcklbw               %%mm7, %%mm3       \n\t"
         "movq                    %%mm1, %%mm4       \n\t"
         "movq                    %%mm3, %%mm5       \n\t"
-        "pmaddwd                    %4, %%mm1       \n\t"
-        "pmaddwd                  8+%4, %%mm3       \n\t"
-        "pmaddwd                 16+%4, %%mm4       \n\t"
+        "pmaddwd                  (%4), %%mm1       \n\t"
+        "pmaddwd                 8(%4), %%mm3       \n\t"
+        "pmaddwd                16(%4), %%mm4       \n\t"
         "pmaddwd                 %%mm6, %%mm5       \n\t"
         "paddd                   %%mm3, %%mm1       \n\t"
         "paddd                   %%mm5, %%mm4       \n\t"
@@ -1789,7 +1789,7 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *
         "add                        $4, %%"REG_a"   \n\t"
         " js                        1b              \n\t"
     : "+r" (src)
-    : "r" (dstU+width), "r" (dstV+width), "g" (-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
+    : "r" (dstU+width), "r" (dstV+width), "g" (-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
     : "%"REG_a
     );
 }

From 1330a8a1cb28fe1119e37f1006d3141e8dcfa65c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A5ns=20Rullg=C3=A5rd?= <mans@mansr.com>
Date: Thu, 21 Jan 2010 12:59:22 +0000
Subject: [PATCH 151/315] Make DECLARE_ALIGNED macros work with external array
 specifiers

The macro implementation might need the name of the variable being
declared for compiler-specific syntax.  Moving array specifiers outside
the macro invocation allows this to work.

Originally committed as revision 21363 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 8a24e98d506f0f44ec58e06291fa0fce703fb6a8)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavutil/internal.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavutil/internal.h b/libavutil/internal.h
index f5f769e2c0..792fd29a6c 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -256,11 +256,11 @@ if((y)<(x)){\
 }
 
 #if defined(__ICC) || defined(__SUNPRO_C)
-    #define DECLARE_ALIGNED(n,t,v)      t v __attribute__ ((aligned (n)))
+    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
     #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
 #elif defined(__GNUC__)
-    #define DECLARE_ALIGNED(n,t,v)      t v __attribute__ ((aligned (n)))
-    #define DECLARE_ASM_CONST(n,t,v)    static const t v attribute_used __attribute__ ((aligned (n)))
+    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
+    #define DECLARE_ASM_CONST(n,t,v)    static const t attribute_used __attribute__ ((aligned (n))) v
 #elif defined(_MSC_VER)
     #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
     #define DECLARE_ASM_CONST(n,t,v)    __declspec(align(n)) static const t v

From 9463a287920e220cfb82035415581c4e951cad84 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Sun, 30 Jan 2011 01:04:41 -0800
Subject: [PATCH 152/315] Fix ff_imdct_calc_sse() on gcc-4.6

Gcc 4.6 only preserves the first value when using an array with an "m"
constraint.

Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit 770c410fbb8e1b87ce8ad7f3d7eddaa55e2b8295)

Conflicts:

	libavcodec/x86/fft_sse.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/x86/fft_sse.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/libavcodec/x86/fft_sse.c b/libavcodec/x86/fft_sse.c
index 3d9f1c5145..918fdf299a 100644
--- a/libavcodec/x86/fft_sse.c
+++ b/libavcodec/x86/fft_sse.c
@@ -22,7 +22,7 @@
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/dsputil.h"
 
-static const int m1m1m1m1[4] __attribute__((aligned(16))) =
+DECLARE_ASM_CONST(16, int, m1m1m1m1)[4] =
     { 1 << 31, 1 << 31, 1 << 31, 1 << 31 };
 
 void ff_fft_dispatch_sse(FFTComplex *z, int nbits);
@@ -182,7 +182,7 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input
     j = -n;
     k = n-16;
     __asm__ volatile(
-        "movaps %4, %%xmm7 \n"
+        "movaps "MANGLE(m1m1m1m1)", %%xmm7 \n"
         "1: \n"
         "movaps       (%2,%1), %%xmm0 \n"
         "movaps       (%3,%0), %%xmm1 \n"
@@ -195,8 +195,7 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input
         "add $16, %0 \n"
         "jl 1b \n"
         :"+r"(j), "+r"(k)
-        :"r"(output+n4), "r"(output+n4*3),
-         "m"(*m1m1m1m1)
+        :"r"(output+n4), "r"(output+n4*3)
     );
 }
 

From 0f2735e839f33af4fe9e2120f908eb31cdfedc34 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sat, 5 Nov 2011 12:53:16 +0100
Subject: [PATCH 153/315] Release notes and changelog for 0.5.5

---
 Changelog | 11 +++++++++++
 RELEASE   | 16 ++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/Changelog b/Changelog
index fbbabc2e4c..173cc00acf 100644
--- a/Changelog
+++ b/Changelog
@@ -1,6 +1,17 @@
 Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
+
+version 0.5.5:
+
+- Fix memory (re)allocation in matroskadec.c (MSVR11-011/CVE-2011-3504)
+- Fix some crashes with invalid bitstreams in the CAVS decoder
+  (CVE-2011-3362, CVE-2011-3973, CVE-2011-3974)
+- Compilation fixes for gcc-4.6, testsuite now passes again
+- Detect and handle overreads in the MJPEG decoder.
+
+
+
 version 0.5.4:
 
 - Fix memory corruption in WMV parsing (addresses CVE-2010-3908)
diff --git a/RELEASE b/RELEASE
index 2f7e2c6dde..75099adfac 100644
--- a/RELEASE
+++ b/RELEASE
@@ -137,3 +137,19 @@ maintenance-only release that addresses several security issues that were
 brought to our attention. In detail, fixes for RV30/40, WMV, Vorbis and
 VC-1 have been backported from trunk. Distributors and system integrators
 are encouraged to update and share their patches against this branch.
+
+
+
+* 0.5.5 Nov 11, 2011
+
+General notes
+-------------
+
+This maintenance-only release addresses several security issues that
+were brought to our attention. In detail, fixes for the MJPEG decoder,
+the CAVS decoder (CVE-2011-3362, CVE-2011-3973, CVE-2011-3974), and the
+Matroska decoder (MSVR11-011/CVE-2011-3504) have been
+corrected. Additional, this release contains fixes for compilation with
+gcc-4.6. Distributors and system integrators are encouraged to update
+and share their patches against this branch.
+

From d6bf79993fe67021584263f87b8a41f9edcec579 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sat, 5 Nov 2011 12:57:22 +0100
Subject: [PATCH 154/315] update version

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 7d8568351b..d1d899fa33 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.4
+0.5.5

From 648dc68098a2d484ebf7b4c48baa3d08e34adef0 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 17 Sep 2011 16:56:33 +0200
Subject: [PATCH 155/315] Reject audio tracks with invalid interleaver
 parameters in RM demuxer.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 4907f813581acd6cf68f1be9eb163464503e8208)
(cherry picked from commit 24e0a9e451e1aae427307a919d78f6790f4e413c)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/rmdec.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index 5e3b3c760c..65bc79c332 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c
@@ -154,8 +154,9 @@ static int rm_read_audio_stream_info(AVFormatContext *s, ByteIOContext *pb,
             ast->audio_framesize = st->codec->block_align;
             st->codec->block_align = coded_framesize;
 
-            if(ast->audio_framesize >= UINT_MAX / sub_packet_h){
-                av_log(s, AV_LOG_ERROR, "ast->audio_framesize * sub_packet_h too large\n");
+            if (ast->audio_framesize <= 0 || sub_packet_h <= 0 ||
+                ast->audio_framesize >= UINT_MAX / sub_packet_h){
+                av_log(s, AV_LOG_ERROR, "ast->audio_framesize * sub_packet_h is invalid\n");
                 return -1;
             }
 
@@ -185,8 +186,9 @@ static int rm_read_audio_stream_info(AVFormatContext *s, ByteIOContext *pb,
             ast->audio_framesize = st->codec->block_align;
             st->codec->block_align = ast->sub_packet_size;
 
-            if(ast->audio_framesize >= UINT_MAX / sub_packet_h){
-                av_log(s, AV_LOG_ERROR, "rm->audio_framesize * sub_packet_h too large\n");
+            if (ast->audio_framesize <= 0 || sub_packet_h <= 0 ||
+                ast->audio_framesize >= UINT_MAX / sub_packet_h){
+                av_log(s, AV_LOG_ERROR, "rm->audio_framesize * sub_packet_h is invalid\n");
                 return -1;
             }
 

From 1883249be396b8dd6b67f8750749bef61dd24525 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 17 Sep 2011 19:40:25 +0200
Subject: [PATCH 156/315] rv34: check for size mismatch

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 35f38b3ab9d755aede5bce8abbe1cb9c07027f8a)
(cherry picked from commit ed9e561490d70e317659f9e406c7920242e509eb)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/rv34.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index da20dbb10b..c4e36211f7 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1276,6 +1276,10 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
             av_log(s->avctx, AV_LOG_ERROR, "Slice type mismatch\n");
             return AVERROR_INVALIDDATA;
         }
+        if (s->width != r->si.width || s->height != r->si.height) {
+            av_log(s->avctx, AV_LOG_ERROR, "Size mismatch\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
     r->si.end = end;

From 8ef917c0337a07fdd1768f9425bdf78693704b1c Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 17 Sep 2011 21:53:21 +0200
Subject: [PATCH 157/315] check all svq3_get_ue_golomb() returns.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 979bea13003ef489d95d2538ac2fb1c26c6f103b)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/rv30.c |  4 ++--
 libavcodec/rv40.c |  5 ++++-
 libavcodec/svq3.c | 12 ++++++------
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
index a2dc57e2e1..0d25eac159 100644
--- a/libavcodec/rv30.c
+++ b/libavcodec/rv30.c
@@ -79,7 +79,7 @@ static int rv30_decode_intra_types(RV34DecContext *r, GetBitContext *gb, int8_t
     for(i = 0; i < 4; i++, dst += r->s.b4_stride - 4){
         for(j = 0; j < 4; j+= 2){
             int code = svq3_get_ue_golomb(gb) << 1;
-            if(code >= 81*2){
+            if(code >= 81U*2U){
                 av_log(r->s.avctx, AV_LOG_ERROR, "Incorrect intra prediction code\n");
                 return -1;
             }
@@ -108,7 +108,7 @@ static int rv30_decode_mb_info(RV34DecContext *r)
     GetBitContext *gb = &s->gb;
     int code = svq3_get_ue_golomb(gb);
 
-    if(code > 11){
+    if(code > 11U){
         av_log(s->avctx, AV_LOG_ERROR, "Incorrect MB type code\n");
         return -1;
     }
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index 8e1a470954..31c9ca3596 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -207,8 +207,11 @@ static int rv40_decode_mb_info(RV34DecContext *r)
     int blocks[RV34_MB_TYPES] = {0};
     int count = 0;
 
-    if(!r->s.mb_skip_run)
+    if(!r->s.mb_skip_run) {
         r->s.mb_skip_run = svq3_get_ue_golomb(gb) + 1;
+        if(r->s.mb_skip_run > (unsigned)s->mb_num)
+            return -1;
+    }
 
     if(--r->s.mb_skip_run)
          return RV34_MB_SKIP;
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index bef7075a26..5d4d4119e0 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -202,7 +202,7 @@ static inline int svq3_decode_block(GetBitContext *gb, DCTELEM *block,
     for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
         for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
 
-          if (vlc == INVALID_VLC)
+          if (vlc < 0)
               return -1;
 
           sign = (vlc & 0x1) - 1;
@@ -220,7 +220,7 @@ static inline int svq3_decode_block(GetBitContext *gb, DCTELEM *block,
                   level = ((vlc + 9) >> 2) - run;
               }
           } else {
-              if (vlc < 16) {
+              if (vlc < 16U) {
                   run   = svq3_dct_tables[intra][vlc].run;
                   level = svq3_dct_tables[intra][vlc].level;
               } else if (intra) {
@@ -549,7 +549,7 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
             for (i = 0; i < 16; i+=2) {
                 vlc = svq3_get_ue_golomb(&s->gb);
 
-                if (vlc >= 25){
+                if (vlc >= 25U){
                     av_log(h->s.avctx, AV_LOG_ERROR, "luma prediction:%d\n", vlc);
                     return -1;
                 }
@@ -620,7 +620,7 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
     }
 
     if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == FF_B_TYPE)) {
-        if ((vlc = svq3_get_ue_golomb(&s->gb)) >= 48){
+        if ((vlc = svq3_get_ue_golomb(&s->gb)) >= 48U){
             av_log(h->s.avctx, AV_LOG_ERROR, "cbp_vlc=%d\n", vlc);
             return -1;
         }
@@ -630,7 +630,7 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
     if (IS_INTRA16x16(mb_type) || (s->pict_type != FF_I_TYPE && s->adaptive_quant && cbp)) {
         s->qscale += svq3_get_se_golomb(&s->gb);
 
-        if (s->qscale > 31){
+        if (s->qscale > 31U){
             av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
             return -1;
         }
@@ -727,7 +727,7 @@ static int svq3_decode_slice_header(H264Context *h)
         skip_bits_long(&s->gb, 0);
     }
 
-    if ((i = svq3_get_ue_golomb(&s->gb)) == INVALID_VLC || i >= 3){
+    if ((i = svq3_get_ue_golomb(&s->gb)) >= 3U){
         av_log(h->s.avctx, AV_LOG_ERROR, "illegal slice type %d \n", i);
         return -1;
     }

From 25bc1108c2d3466358da8d3e27d08dbdb9840ea2 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Wed, 21 Sep 2011 20:46:29 +0200
Subject: [PATCH 158/315] Check output buffer size in nellymoser decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 741ec30bd2385f794efa9fafa84d39a917f2574e)
(cherry picked from commit 533dbaa55b7d45d5ca76f9ed46f5690282f86ea9)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/nellymoserdec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c
index 94e6129877..ec35369916 100644
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -154,6 +154,7 @@ static int decode_tag(AVCodecContext * avctx,
                       void *data, int *data_size,
                       const uint8_t * buf, int buf_size) {
     NellyMoserDecodeContext *s = avctx->priv_data;
+    int data_max = *data_size;
     int blocks, i;
     int16_t* samples;
     *data_size = 0;
@@ -177,6 +178,8 @@ static int decode_tag(AVCodecContext * avctx,
     }
 
     for (i=0 ; i<blocks ; i++) {
+        if ((i + 1) * NELLY_SAMPLES * sizeof(int16_t) > data_max)
+            return i > 0 ? i * NELLY_BLOCK_LEN : -1;
         nelly_decode_block(s, &buf[i*NELLY_BLOCK_LEN], s->float_buf);
         s->dsp.float_to_int16(&samples[i*NELLY_SAMPLES], s->float_buf, NELLY_SAMPLES);
         *data_size += NELLY_SAMPLES*sizeof(int16_t);

From b24c2e59fec0342a57af86eaec3080e264894276 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Wed, 21 Sep 2011 20:46:33 +0200
Subject: [PATCH 159/315] Release old pictures after a resolution change in
 vp5/6 decoder

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit dba20b84784a7931b7eac50ced1d43e86801bde9)
(cherry picked from commit c9c6e5f4e8680b7b7801dd6943590ae9cd6bfd89)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/vp56.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index ad11b5289a..65ddb8b421 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -519,6 +519,16 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
         if (!res)
             return -1;
 
+        if (res == 2) {
+            int i;
+            for (i = 0; i < 4; i++) {
+                if (s->frames[i].data[0])
+                    avctx->release_buffer(avctx, &s->frames[i]);
+            }
+            if (is_alpha)
+                return -1;
+        }
+
         if (!is_alpha) {
             p->reference = 1;
             if (avctx->get_buffer(avctx, p) < 0) {

From 07df40db6e9cc34127a13ad0553eb88314f82f7c Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 24 Sep 2011 23:16:18 +0200
Subject: [PATCH 160/315] Check for invalid update parameters in vmd video
 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit e7aed1280ea14b60fceae04d71dfd03e1daf2d04)
(cherry picked from commit 1ed90c84f6ab75af91b08436cefb8ea464f8495b)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/vmdav.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libavcodec/vmdav.c b/libavcodec/vmdav.c
index 1921c81ca2..a63afb0cdc 100644
--- a/libavcodec/vmdav.c
+++ b/libavcodec/vmdav.c
@@ -206,6 +206,16 @@ static void vmd_decode(VmdVideoContext *s)
     frame_y = AV_RL16(&s->buf[8]);
     frame_width = AV_RL16(&s->buf[10]) - frame_x + 1;
     frame_height = AV_RL16(&s->buf[12]) - frame_y + 1;
+    if (frame_x < 0 || frame_width < 0 ||
+        frame_x >= s->avctx->width ||
+        frame_width > s->avctx->width ||
+        frame_x + frame_width > s->avctx->width)
+        return;
+    if (frame_y < 0 || frame_height < 0 ||
+        frame_y >= s->avctx->height ||
+        frame_height > s->avctx->height ||
+        frame_y + frame_height > s->avctx->height)
+        return;
 
     if ((frame_width == s->avctx->width && frame_height == s->avctx->height) &&
         (frame_x || frame_y)) {

From 240546a185931426d241662dbd109f02d64e4107 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Fri, 30 Sep 2011 00:05:51 +0200
Subject: [PATCH 161/315] Check for out of bounds writes in the Delphine
 Software International CIN decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 3035c4034b6af3ad47f921e3385196e1b9d44ddf)
(cherry picked from commit 6e774cf67e6f30feb9b3dec11713d6b6dc0b521c)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/dsicinav.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/dsicinav.c b/libavcodec/dsicinav.c
index f8093fccf2..0d9fe0502e 100644
--- a/libavcodec/dsicinav.c
+++ b/libavcodec/dsicinav.c
@@ -215,6 +215,8 @@ static int cinvideo_decode_frame(AVCodecContext *avctx,
 
     /* handle palette */
     if (palette_type == 0) {
+        if (palette_colors_count > 256)
+            return AVERROR_INVALIDDATA;
         for (i = 0; i < palette_colors_count; ++i) {
             cin->palette[i] = bytestream_get_le24(&buf);
             bitmap_frame_size -= 3;

From 635256a324f4644bbfa2f4332e950a183f82c4c9 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Fri, 30 Sep 2011 00:05:53 +0200
Subject: [PATCH 162/315] Fix out of bound writes in fix_bitshift() of the
 shorten decoder.

The data pointers s->decoded[*] already take into account s->nwrap.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f42b3195d3f2692a4dfc0a8668bb4ac35301f2ed)
(cherry picked from commit 107ea3057eb8de8a38c45c2f7181c42ea694b187)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/shorten.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index 053f5c2ed1..f8d2ff97fe 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -147,7 +147,7 @@ static void fix_bitshift(ShortenContext *s, int32_t *buffer)
 
     if (s->bitshift != 0)
         for (i = 0; i < s->blocksize; i++)
-            buffer[s->nwrap + i] <<= s->bitshift;
+            buffer[i] <<= s->bitshift;
 }
 
 
From 2e1e3c1e41b67b11d06e077255e31886550510c6 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 1 Oct 2011 00:44:54 +0200
Subject: [PATCH 163/315] Check for corrupted data in avs demuxer.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 1cce7def0a8eff2e7db294b7d195a0fb1a5043b0)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/avs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/avs.c b/libavformat/avs.c
index 1fcb19fdde..878898edbc 100644
--- a/libavformat/avs.c
+++ b/libavformat/avs.c
@@ -163,6 +163,8 @@ static int avs_read_packet(AVFormatContext * s, AVPacket * pkt)
             sub_type = get_byte(s->pb);
             type = get_byte(s->pb);
             size = get_le16(s->pb);
+            if (size < 4)
+                return AVERROR_INVALIDDATA;
             avs->remaining_frame_size -= size;
 
             switch (type) {

From 62da9203fd0c26e075c59eb6a82006de22fdbdc1 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 1 Oct 2011 00:44:55 +0200
Subject: [PATCH 164/315] Check for out of bound writes in the avs demuxer.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 5d44c061cf511d97be5fac8d76be2f3915c6e798)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/avs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/avs.c b/libavformat/avs.c
index 878898edbc..eda7f9d2f7 100644
--- a/libavformat/avs.c
+++ b/libavformat/avs.c
@@ -169,6 +169,8 @@ static int avs_read_packet(AVFormatContext * s, AVPacket * pkt)
 
             switch (type) {
             case AVS_PALETTE:
+                if (size - 4 > sizeof(palette))
+                    return AVERROR_INVALIDDATA;
                 ret = get_buffer(s->pb, palette, size - 4);
                 if (ret < size - 4)
                     return AVERROR(EIO);

From 3699a46ed7179601740a24bd9975abd2409ebf31 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 1 Oct 2011 00:45:05 +0200
Subject: [PATCH 165/315] Check for out of bound writes in the QDM2 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 4a7876c6e4e62e94d51e364ba99aae4da7671238)
(cherry picked from commit b08df314dca6946ed644caacb9d3a533a054c0f6)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/qdm2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index a3373a16d9..69be061350 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1821,6 +1821,8 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
 
     avctx->channels = s->nb_channels = s->channels = AV_RB32(extradata);
     extradata += 4;
+    if (s->channels > MPA_MAX_CHANNELS)
+        return AVERROR_INVALIDDATA;
 
     avctx->sample_rate = AV_RB32(extradata);
     extradata += 4;
@@ -1843,6 +1845,8 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     // something like max decodable tones
     s->group_order = av_log2(s->group_size) + 1;
     s->frame_size = s->group_size / 16; // 16 iterations per super block
+    if (s->frame_size > FF_ARRAY_ELEMS(s->output_buffer) / 2)
+        return AVERROR_INVALIDDATA;
 
     s->sub_sampling = s->fft_order - 7;
     s->frequency_range = 255 / (1 << (2 - s->sub_sampling));

From 1a6f024520d667a9fbf63e701cc029a6d556d76e Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@elivagar.org>
Date: Sat, 1 Oct 2011 00:43:05 +0200
Subject: [PATCH 166/315] Prevent block size from inreasing in the shorten
 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit b399cbfba5d901608c18e1a2d48a24c30541a634)
(cherry picked from commit 55a96a984ec65736475a8577a158abc5c48fd50a)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/shorten.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index f8d2ff97fe..6aacd5c6e4 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -469,9 +469,15 @@ static int shorten_decode_frame(AVCodecContext *avctx,
             case FN_BITSHIFT:
                 s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
                 break;
-            case FN_BLOCKSIZE:
-                s->blocksize = get_uint(s, av_log2(s->blocksize));
+            case FN_BLOCKSIZE: {
+                int blocksize = get_uint(s, av_log2(s->blocksize));
+                if (blocksize > s->blocksize) {
+                    av_log(avctx, AV_LOG_ERROR, "Increasing block size is not supported\n");
+                    return AVERROR_PATCHWELCOME;
+                }
+                s->blocksize = blocksize;
                 break;
+            }
             case FN_QUIT:
                 *data_size = 0;
                 return buf_size;

From 8e6173c76a33ae3358314eace94e3e854c4c2555 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 2 Oct 2011 00:38:27 +0200
Subject: [PATCH 167/315] Check for out of bound accesses in the 4xm decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 9c661e952fbcbf044709f9a7031c68cc4860336b)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/4xm.c | 59 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 46 insertions(+), 13 deletions(-)

diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index 5c96baaf3f..e33001f306 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -133,7 +133,9 @@ typedef struct FourXContext{
     GetBitContext pre_gb;          ///< ac/dc prefix
     GetBitContext gb;
     const uint8_t *bytestream;
+    const uint8_t *bytestream_end;
     const uint16_t *wordstream;
+    const uint16_t *wordstream_end;
     int mv[256];
     VLC pre_vlc;
     int last_dc;
@@ -308,6 +310,8 @@ static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int lo
     assert(code>=0 && code<=6);
 
     if(code == 0){
+        if (f->bytestream_end - f->bytestream < 1)
+            return;
         src += f->mv[ *f->bytestream++ ];
         if(start > src || src > end){
             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
@@ -325,15 +329,23 @@ static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int lo
     }else if(code == 3 && f->version<2){
         mcdc(dst, src, log2w, h, stride, 1, 0);
     }else if(code == 4){
+        if (f->bytestream_end - f->bytestream < 1)
+            return;
         src += f->mv[ *f->bytestream++ ];
         if(start > src || src > end){
             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
             return;
         }
+        if (f->wordstream_end - f->wordstream < 1)
+            return;
         mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++));
     }else if(code == 5){
+        if (f->wordstream_end - f->wordstream < 1)
+            return;
         mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++));
     }else if(code == 6){
+        if (f->wordstream_end - f->wordstream < 2)
+            return;
         if(log2w){
             dst[0] = le2me_16(*f->wordstream++);
             dst[1] = le2me_16(*f->wordstream++);
@@ -355,6 +367,8 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length){
 
     if(f->version>1){
         extra=20;
+        if (length < extra)
+            return -1;
         bitstream_size= AV_RL32(buf+8);
         wordstream_size= AV_RL32(buf+12);
         bytestream_size= AV_RL32(buf+16);
@@ -365,11 +379,10 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length){
         bytestream_size= FFMAX(length - bitstream_size - wordstream_size, 0);
     }
 
-    if(bitstream_size+ bytestream_size+ wordstream_size + extra != length
-       || bitstream_size  > (1<<26)
-       || bytestream_size > (1<<26)
-       || wordstream_size > (1<<26)
-       ){
+    if (bitstream_size > length ||
+        bytestream_size > length - bitstream_size ||
+        wordstream_size > length - bytestream_size - bitstream_size ||
+        extra > length - bytestream_size - bitstream_size - wordstream_size){
         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
         bitstream_size+ bytestream_size+ wordstream_size - length);
         return -1;
@@ -380,7 +393,9 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length){
     init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);
 
     f->wordstream= (const uint16_t*)(buf + extra + bitstream_size);
+    f->wordstream_end= f->wordstream + wordstream_size/2;
     f->bytestream= buf + extra + bitstream_size + wordstream_size;
+    f->bytestream_end = f->bytestream + bytestream_size;
 
     init_mv(f);
 
@@ -509,7 +524,7 @@ static int decode_i_mb(FourXContext *f){
     return 0;
 }
 
-static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const buf){
+static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const buf, int buf_size){
     int frequency[512];
     uint8_t flag[512];
     int up[512];
@@ -517,6 +532,7 @@ static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const
     int bits_tab[257];
     int start, end;
     const uint8_t *ptr= buf;
+    const uint8_t *ptr_end = buf + buf_size;
     int j;
 
     memset(frequency, 0, sizeof(frequency));
@@ -527,6 +543,8 @@ static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const
     for(;;){
         int i;
 
+        if (start <= end && ptr_end - ptr < end - start + 1 + 1)
+            return NULL;
         for(i=start; i<=end; i++){
             frequency[i]= *ptr++;
         }
@@ -599,10 +617,13 @@ static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length){
     const int height= f->avctx->height;
     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
     const int stride= f->current_picture.linesize[0]>>1;
+    const uint8_t *buf_end = buf + length;
 
     for(y=0; y<height; y+=16){
         for(x=0; x<width; x+=16){
             unsigned int color[4], bits;
+            if (buf_end - buf < 8)
+                return -1;
             memset(color, 0, sizeof(color));
 //warning following is purely guessed ...
             color[0]= bytestream_get_le16(&buf);
@@ -636,18 +657,23 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length){
     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
     const int stride= f->current_picture.linesize[0]>>1;
     const unsigned int bitstream_size= AV_RL32(buf);
-    const int token_count av_unused = AV_RL32(buf + bitstream_size + 8);
-    unsigned int prestream_size= 4*AV_RL32(buf + bitstream_size + 4);
-    const uint8_t *prestream= buf + bitstream_size + 12;
+    unsigned int prestream_size;
+    const uint8_t *prestream;
 
-    if(prestream_size + bitstream_size + 12 != length
-       || bitstream_size > (1<<26)
-       || prestream_size > (1<<26)){
+    if (bitstream_size > (1<<26) || length < bitstream_size + 12)
+        return -1;
+    prestream_size = 4*AV_RL32(buf + bitstream_size + 4);
+    prestream = buf + bitstream_size + 12;
+
+    if (prestream_size > (1<<26) ||
+        prestream_size != length - (bitstream_size + 12)){
         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
         return -1;
     }
 
-    prestream= read_huffman_tables(f, prestream);
+    prestream= read_huffman_tables(f, prestream, buf + length - prestream);
+    if (!prestream)
+        return -1;
 
     init_get_bits(&f->gb, buf + 4, 8*bitstream_size);
 
@@ -684,6 +710,8 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame *p, temp;
     int i, frame_4cc, frame_size;
 
+    if (buf_size < 12)
+        return AVERROR_INVALIDDATA;
     frame_4cc= AV_RL32(buf);
     if(buf_size != AV_RL32(buf+4)+8 || buf_size < 20){
         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, AV_RL32(buf+4));
@@ -696,6 +724,9 @@ static int decode_frame(AVCodecContext *avctx,
         const int whole_size= AV_RL32(buf+16);
         CFrameBuffer *cfrm;
 
+        if (data_size < 0 || whole_size < 0)
+            return AVERROR_INVALIDDATA;
+
         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
             if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
@@ -712,6 +743,8 @@ static int decode_frame(AVCodecContext *avctx,
         }
         cfrm= &f->cfrm[i];
 
+        if (data_size > UINT_MAX -  cfrm->size - FF_INPUT_BUFFER_PADDING_SIZE)
+            return AVERROR_INVALIDDATA;
         cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
         if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL
             av_log(f->avctx, AV_LOG_ERROR, "realloc falure");

From d1a5b53ededd091484949a33c6e9f97d4612602d Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 2 Oct 2011 16:06:37 +0200
Subject: [PATCH 168/315] h264: do not let invalid values in h->ref_count on
 ff_h264_decode_ref_pic_list_reordering() errors.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 2428b53f6d306d8d71dec34fa7b0af733d76cfac)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/h264.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 3ea61330ec..c3771f2b68 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3940,8 +3940,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         fill_default_ref_list(h);
     }
 
-    if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
+    if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0) {
+        h->ref_count[1]= h->ref_count[0]= 0;
         return -1;
+    }
 
     if(h->slice_type_nos!=FF_I_TYPE){
         s->last_picture_ptr= &h->ref_list[0][0];

From ddbbe500b04a6c2a6551461843e6c94e5576fcbf Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 2 Oct 2011 16:06:38 +0200
Subject: [PATCH 169/315] h264: fix the check for invalid SPS:num_ref_frames.

This patch set the limit to 16.

For information, thoses previous commits:
41f7e2d11d2dca23842ee89d530ca9fa15cec9d8
5cbb0e70a0a2ee99eb3cb09e837b9a1f7355b9bc
assumed it was either 30 or 32.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit bcf881a6858760ecbd9ff4352a38813dc4232dd6)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/h264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index c3771f2b68..205f8018ec 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -7167,7 +7167,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
     }
 
     sps->ref_frame_count= get_ue_golomb_31(&s->gb);
-    if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
+    if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count > 16U){
         av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
         goto fail;
     }

From 2eb5f77bc8ac0b533e3d305bf6dbe54186471465 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Tue, 4 Oct 2011 22:13:58 +0200
Subject: [PATCH 170/315] h264: do not let invalid values in h->ref_count after
 a decoder reset.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 0333d234b0355b375762447e93674e3fe3c5bff1)
(cherry picked from commit f74d1c6de7ef810544edae947db1eb1e2c7b6361)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/h264.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 205f8018ec..aaa8ad7d7d 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3743,6 +3743,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         free_tables(h);
         flush_dpb(s->avctx);
         MPV_common_end(s);
+        h->list_count = 0;
     }
     if (!s->context_initialized) {
         if(h != h0)

From 8d1fa1c97e1d11614489d2ea746be1a435563dd0 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 13 Sep 2011 18:53:18 -0400
Subject: [PATCH 171/315] mpc7: check output buffer size before decoding
 (cherry picked from commit c8b5c4d27409dfdcec80868686b173ba446c998b)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit b833859daa4eb8fe0ec9117859b21a734905b895)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mpc7.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index 7077c96fc2..2858255152 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -164,7 +164,7 @@ static int mpc7_decode_frame(AVCodecContext * avctx,
     int i, ch, t;
     int mb = -1;
     Band *bands = c->bands;
-    int off;
+    int off, out_size;
     int bits_used, bits_avail;
 
     memset(bands, 0, sizeof(bands));
@@ -172,6 +172,12 @@ static int mpc7_decode_frame(AVCodecContext * avctx,
         av_log(avctx, AV_LOG_ERROR, "Too small buffer passed (%i bytes)\n", buf_size);
     }
 
+    out_size = (buf[1] ? c->lastframelen : MPC_FRAME_SIZE) * 4;
+    if (*data_size < out_size) {
+        av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n");
+        return AVERROR(EINVAL);
+    }
+
     bits = av_malloc(((buf_size - 1) & ~3) + FF_INPUT_BUFFER_PADDING_SIZE);
     c->dsp.bswap_buf((uint32_t*)bits, (const uint32_t*)(buf + 4), (buf_size - 4) >> 2);
     init_get_bits(&gb, bits, (buf_size - 4)* 8);
@@ -248,7 +254,7 @@ static int mpc7_decode_frame(AVCodecContext * avctx,
         *data_size = 0;
         return buf_size;
     }
-    *data_size = (buf[1] ? c->lastframelen : MPC_FRAME_SIZE) * 4;
+    *data_size = out_size;
 
     return buf_size;
 }

From 58087a4e640eb2e1ae77de873f9c1aa37e9c8bbe Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 14 Sep 2011 11:16:42 -0400
Subject: [PATCH 172/315] mpc7: return error if packet is too small. (cherry
 picked from commit 8290d1f38b438f1b070de67645c8b4a42014c7ac)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 490617b6ffa13f8e49a196a752f927d5ebad6e2b)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mpc7.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index 2858255152..34f76932b8 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -170,6 +170,7 @@ static int mpc7_decode_frame(AVCodecContext * avctx,
     memset(bands, 0, sizeof(bands));
     if(buf_size <= 4){
         av_log(avctx, AV_LOG_ERROR, "Too small buffer passed (%i bytes)\n", buf_size);
+        return AVERROR(EINVAL);
     }
 
     out_size = (buf[1] ? c->lastframelen : MPC_FRAME_SIZE) * 4;

From 23aaa82b1d30456339c09b3591fe78534839d4d0 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 8 Oct 2011 23:40:37 +0200
Subject: [PATCH 173/315] vqa: fix double free on corrupted streams

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit e3123856c79c36507772ada1bcda6cfe36a1e297)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/westwood.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/libavformat/westwood.c b/libavformat/westwood.c
index d2736accc5..7ca179717c 100644
--- a/libavformat/westwood.c
+++ b/libavformat/westwood.c
@@ -279,10 +279,8 @@ static int wsvqa_read_header(AVFormatContext *s,
     /* there are 0 or more chunks before the FINF chunk; iterate until
      * FINF has been skipped and the file will be ready to be demuxed */
     do {
-        if (get_buffer(pb, scratch, VQA_PREAMBLE_SIZE) != VQA_PREAMBLE_SIZE) {
-            av_free(st->codec->extradata);
+        if (get_buffer(pb, scratch, VQA_PREAMBLE_SIZE) != VQA_PREAMBLE_SIZE)
             return AVERROR(EIO);
-        }
         chunk_tag = AV_RB32(&scratch[0]);
         chunk_size = AV_RB32(&scratch[4]);
 

From e1a46eff7a1017e1d3afc5204b82e08a1ea0bf3d Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 8 Oct 2011 23:01:33 +0200
Subject: [PATCH 174/315] qtrle: check for invalid line offset

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit a4ed7c3fe9f99b89f86b65710d8855dc572f1a25)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 67c46b9b3027fdd9fd737e21a80d3326748b1c15)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/qtrle.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
index d535c38dd9..b66acd4220 100644
--- a/libavcodec/qtrle.c
+++ b/libavcodec/qtrle.c
@@ -461,6 +461,8 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
         stream_ptr += 4;
         height = AV_RB16(&s->buf[stream_ptr]);
         stream_ptr += 4;
+        if (height > s->avctx->height - start_line)
+            goto done;
     } else {
         start_line = 0;
         height = s->avctx->height;

From c603cf51704714a3b9e1d3efa78bd4627d4b9e74 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 8 Oct 2011 23:40:36 +0200
Subject: [PATCH 175/315] qtrle: check for out of bound writes.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 7fb92be7e50ea4ba5712804326c6814ae02dd190)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit a65045915f5b4ec6da73df54d1914b320a861223)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/qtrle.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
index b66acd4220..d68b44f405 100644
--- a/libavcodec/qtrle.c
+++ b/libavcodec/qtrle.c
@@ -127,6 +127,7 @@ static inline void qtrle_decode_2n4bpp(QtrleContext *s, int stream_ptr,
     while (lines_to_change--) {
         CHECK_STREAM_PTR(2);
         pixel_ptr = row_ptr + (num_pixels * (s->buf[stream_ptr++] - 1));
+        CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
 
         while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
             if (rle_code == 0) {
@@ -183,6 +184,7 @@ static void qtrle_decode_8bpp(QtrleContext *s, int stream_ptr, int row_ptr, int
     while (lines_to_change--) {
         CHECK_STREAM_PTR(2);
         pixel_ptr = row_ptr + (4 * (s->buf[stream_ptr++] - 1));
+        CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
 
         while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
             if (rle_code == 0) {
@@ -236,6 +238,7 @@ static void qtrle_decode_16bpp(QtrleContext *s, int stream_ptr, int row_ptr, int
     while (lines_to_change--) {
         CHECK_STREAM_PTR(2);
         pixel_ptr = row_ptr + (s->buf[stream_ptr++] - 1) * 2;
+        CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
 
         while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
             if (rle_code == 0) {
@@ -285,6 +288,7 @@ static void qtrle_decode_24bpp(QtrleContext *s, int stream_ptr, int row_ptr, int
     while (lines_to_change--) {
         CHECK_STREAM_PTR(2);
         pixel_ptr = row_ptr + (s->buf[stream_ptr++] - 1) * 3;
+        CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
 
         while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
             if (rle_code == 0) {
@@ -336,6 +340,7 @@ static void qtrle_decode_32bpp(QtrleContext *s, int stream_ptr, int row_ptr, int
     while (lines_to_change--) {
         CHECK_STREAM_PTR(2);
         pixel_ptr = row_ptr + (s->buf[stream_ptr++] - 1) * 4;
+        CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
 
         while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
             if (rle_code == 0) {

From 36e4be0a0adc18358a59043167a55da8eac16023 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 11 Oct 2011 22:03:19 +0200
Subject: [PATCH 176/315] mem: fix memalign hack av_realloc()

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit fc11927890f38445a950b453d24928525da0e61a)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 5ae87280e219e843c71201c580780e8e30083559)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/mem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavutil/mem.c b/libavutil/mem.c
index 741450b53f..1ce0b778cc 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -113,7 +113,9 @@ void *av_realloc(void *ptr, unsigned int size)
     //FIXME this isn't aligned correctly, though it probably isn't needed
     if(!ptr) return av_malloc(size);
     diff= ((char*)ptr)[-1];
-    return (char*)realloc((char*)ptr - diff, size + diff) + diff;
+    ptr= realloc((char*)ptr - diff, size + diff);
+    if(ptr) ptr = (char*)ptr + diff;
+    return ptr;
 #else
     return realloc(ptr, size);
 #endif

From 8acc0546bbb0deacdf1e79f5486a0811840246fc Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sat, 15 Oct 2011 00:03:55 +0200
Subject: [PATCH 177/315] matroskadec: fix out of bounds write

Signed-off-by: Janne Grunau <janne-libav@jannau.net>
(cherry picked from commit 723229c11f1400e6a09c8a1c9c27193f376eb1d1)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit d51c7b4cbe022f6b3b026735dc7e29eb50bbf129)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/matroskadec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index d9ffec3727..0d75c5c13f 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1641,7 +1641,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
                         lace_size[n] = lace_size[n - 1] + snum;
                         total += lace_size[n];
                     }
-                    lace_size[n] = size - total;
+                    lace_size[laces - 1] = size - total;
                     break;
                 }
             }

From e124c3c298a7abe2ded2b90817915c5baf8ea1be Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 27 Oct 2011 14:31:53 +0200
Subject: [PATCH 178/315] resample: Fix overflow

Found-by: Jim Radford
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 6ae93d030476ddd7fa2ab4d9d2dd25df85725390)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/resample2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavcodec/resample2.c b/libavcodec/resample2.c
index ac9db73c8c..01478190a3 100644
--- a/libavcodec/resample2.c
+++ b/libavcodec/resample2.c
@@ -190,8 +190,10 @@ AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_size,
     memcpy(&c->filter_bank[c->filter_length*phase_count+1], c->filter_bank, (c->filter_length-1)*sizeof(FELEM));
     c->filter_bank[c->filter_length*phase_count]= c->filter_bank[c->filter_length - 1];
 
-    c->src_incr= out_rate;
-    c->ideal_dst_incr= c->dst_incr= in_rate * phase_count;
+    if(!av_reduce(&c->src_incr, &c->dst_incr, out_rate, in_rate * (int64_t)phase_count, INT32_MAX/2))
+        return NULL;
+    c->ideal_dst_incr= c->dst_incr;
+
     c->index= -phase_count*((c->filter_length-1)/2);
 
     return c;

From d39cc3c092936896d787c29d7e215a273e22a57e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 27 Oct 2011 14:34:45 +0200
Subject: [PATCH 179/315] resample2: fix potential overflow

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit a39b5e8b323785695fb0e3c0f30bd9e24287db87)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/resample2.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavcodec/resample2.c b/libavcodec/resample2.c
index 01478190a3..19fed30a38 100644
--- a/libavcodec/resample2.c
+++ b/libavcodec/resample2.c
@@ -227,10 +227,9 @@ int av_resample(AVResampleContext *c, short *dst, short *src, int *consumed, int
             dst[dst_index] = src[index2>>32];
             index2 += incr;
         }
-        frac += dst_index * dst_incr_frac;
         index += dst_index * dst_incr;
-        index += frac / c->src_incr;
-        frac %= c->src_incr;
+        index += (frac + dst_index * (int64_t)dst_incr_frac) / c->src_incr;
+        frac   = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr;
   }else{
     for(dst_index=0; dst_index < dst_size; dst_index++){
         FELEM *filter= c->filter_bank + c->filter_length*(index & c->phase_mask);

From 8bd374858f860e73157768b102cebe96116e688d Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 27 Oct 2011 15:26:45 +0200
Subject: [PATCH 180/315] resample: Fix array size

Found-by: Jim Radford
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 3e7db0a9ee758bf0570a141be1fea64f8d9c03db)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit edf3c5a3ebeee8df55c6a05f88a682091f10a364)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/resample.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/resample.c b/libavcodec/resample.c
index 829db76570..c40f7225c2 100644
--- a/libavcodec/resample.c
+++ b/libavcodec/resample.c
@@ -279,9 +279,9 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
     if (s->sample_fmt[1] != SAMPLE_FMT_S16) {
         output_bak = output;
 
-        if (!s->buffer_size[1] || s->buffer_size[1] < lenout) {
+        if (!s->buffer_size[1] || s->buffer_size[1] < 2*lenout) {
             av_free(s->buffer[1]);
-            s->buffer_size[1] = lenout;
+            s->buffer_size[1] = 2*lenout;
             s->buffer[1] = av_malloc(s->buffer_size[1]);
             if (!s->buffer[1]) {
                 av_log(s, AV_LOG_ERROR, "Could not allocate buffer\n");

From 16ea6af381906281c0db89ee0356a9553c41339d Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 6 Nov 2011 20:57:55 +0100
Subject: [PATCH 181/315] 0.5: some updates.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 Changelog | 1 +
 RELEASE   | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Changelog b/Changelog
index 173cc00acf..526dacee47 100644
--- a/Changelog
+++ b/Changelog
@@ -9,6 +9,7 @@ version 0.5.5:
   (CVE-2011-3362, CVE-2011-3973, CVE-2011-3974)
 - Compilation fixes for gcc-4.6, testsuite now passes again
 - Detect and handle overreads in the MJPEG decoder.
+- multiple other security fixes.
 
 
diff --git a/RELEASE b/RELEASE
index 75099adfac..ff438bdd9d 100644
--- a/RELEASE
+++ b/RELEASE
@@ -140,7 +140,7 @@ are encouraged to update and share their patches against this branch.
 
 
-* 0.5.5 Nov 11, 2011
+* 0.5.5 Nov 6, 2011
 
 General notes
 -------------
@@ -148,7 +148,7 @@ General notes
 This maintenance-only release addresses several security issues that
 were brought to our attention. In detail, fixes for the MJPEG decoder,
 the CAVS decoder (CVE-2011-3362, CVE-2011-3973, CVE-2011-3974), and the
-Matroska decoder (MSVR11-011/CVE-2011-3504) have been
+Matroska decoder (MSVR11-011/CVE-2011-3504) and many others have been
 corrected. Additional, this release contains fixes for compilation with
 gcc-4.6. Distributors and system integrators are encouraged to update
 and share their patches against this branch.

From 0cd61bfa6d6ecf2ab57da8496858473158c4ea35 Mon Sep 17 00:00:00 2001
From: Thierry Foucu <tfoucu@gmail.com>
Date: Thu, 17 Nov 2011 09:39:52 -0800
Subject: [PATCH 182/315] vp6: Fix illegal read.

Found with Address Sanitizer

Signed-off-by: Alex Converse <alex.converse@gmail.com>
(cherry picked from commit e0966eb140b3569b3d6b5b5008961944ef229c06)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 8a63deab15ef41fd439be1b46d8dcb73669ccfc1)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/vp6.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 5071903701..b89ff498da 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -427,7 +427,8 @@ static void vp6_parse_coeff(VP56Context *s)
         model1 = model->coeff_dccv[pt];
         model2 = model->coeff_dcct[pt][ctx];
 
-        for (coeff_idx=0; coeff_idx<64; ) {
+        coeff_idx = 0;
+        for (;;) {
             if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) {
                 /* parse a coeff */
                 if (vp56_rac_get_prob(c, model2[2])) {
@@ -468,8 +469,10 @@ static void vp6_parse_coeff(VP56Context *s)
                             run += vp56_rac_get_prob(c, model3[i+8]) << i;
                 }
             }
-
-            cg = vp6_coeff_groups[coeff_idx+=run];
+            coeff_idx += run;
+            if (coeff_idx >= 64)
+                break;
+            cg = vp6_coeff_groups[coeff_idx];
             model1 = model2 = model->coeff_ract[pt][ct][cg];
         }
 

From 37cc48861d7898c27d4b95cf890e01ba44ddae02 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 17 Nov 2011 10:06:14 -0800
Subject: [PATCH 183/315] vp5: Fix illegal read.

Found with Address Sanitizer
(cherry picked from commit bb4b0ad83b13c3af57675e80163f3f333adef96f)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f62fa1ce9f12e4a43b41401a7416c6fa8da579c9)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/vp5.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vp5.c b/libavcodec/vp5.c
index dfa2a1b094..c32f4f050e 100644
--- a/libavcodec/vp5.c
+++ b/libavcodec/vp5.c
@@ -199,7 +199,8 @@ static void vp5_parse_coeff(VP56Context *s)
         model1 = model->coeff_dccv[pt];
         model2 = model->coeff_dcct[pt][ctx];
 
-        for (coeff_idx=0; coeff_idx<64; ) {
+        coeff_idx = 0;
+        for (;;) {
             if (vp56_rac_get_prob(c, model2[0])) {
                 if (vp56_rac_get_prob(c, model2[2])) {
                     if (vp56_rac_get_prob(c, model2[3])) {
@@ -236,8 +237,11 @@ static void vp5_parse_coeff(VP56Context *s)
                 ct = 0;
                 s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 0;
             }
+            coeff_idx++;
+            if (coeff_idx >= 64)
+                break;
 
-            cg = vp5_coeff_groups[++coeff_idx];
+            cg = vp5_coeff_groups[coeff_idx];
             ctx = s->coeff_ctx[vp56_b6to4[b]][coeff_idx];
             model1 = model->coeff_ract[pt][ct][cg];
             model2 = cg > 2 ? model1 : model->coeff_acct[pt][ct][cg][ctx];

From 037b1142cde54e64c8de2a5fd79c259a36b5e244 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 16 Nov 2011 03:31:25 +0100
Subject: [PATCH 184/315] wma: Check channel number before init. Fixes
 Ticket240

Based on patch by ami_stuff
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 20431a9982b9bd2c475042d919890a941ad70c71)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/wmadec.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index e7936ce49f..088d510a0b 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -105,6 +105,11 @@ static int wma_decode_init(AVCodecContext * avctx)
     s->use_bit_reservoir = flags2 & 0x0002;
     s->use_variable_block_len = flags2 & 0x0004;
 
+    if(avctx->channels > MAX_CHANNELS){
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of channels (%d)\n", avctx->channels);
+        return -1;
+    }
+
     if(ff_wma_init(avctx, flags2)<0)
         return -1;
 

From 4f209fe9606a4fb73f2f10d9106a8e990dac3b8d Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 16 Nov 2011 17:21:42 +0100
Subject: [PATCH 185/315] cinepak: check strip_size

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit cea0c82d9b9771dfa2ac729c13c0d9e03ea352a7)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 211a107208ee636da81d2a89592181e2d78a0c8c)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/cinepak.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/cinepak.c b/libavcodec/cinepak.c
index c248f00ec9..f6d457dd97 100644
--- a/libavcodec/cinepak.c
+++ b/libavcodec/cinepak.c
@@ -366,6 +366,8 @@ static int cinepak_decode (CinepakContext *s)
         s->strips[i].x2 = s->avctx->width;
 
         strip_size = AV_RB24 (&s->data[1]) - 12;
+        if(strip_size < 0)
+            return -1;
         s->data   += 12;
         strip_size = ((s->data + strip_size) > eod) ? (eod - s->data) : strip_size;
 

From 2c8ac664561a1843f299295443720ce83105c024 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 18 Nov 2011 17:56:24 +0100
Subject: [PATCH 186/315] qdm2dec: check remaining input bits in the mainloop
 of qdm2_fft_decode_tones() This is neccessary but likely not sufficient to
 prevent out of array reads.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 14db3af4f26dad8e6ddf2147e96ccc710952ad4d)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 8120a1d9bd4bcc4434b4f588f50c9d81aa8ad0e0)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/qdm2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 69be061350..3aef7a36e2 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1351,7 +1351,7 @@ static void qdm2_fft_decode_tones (QDM2Context *q, int duration, GetBitContext *
     local_int_10 = 1 << (q->group_order - duration - 1);
     offset = 1;
 
-    while (1) {
+    while (get_bits_left(gb)>0) {
         if (q->superblocktype_2_3) {
             while ((n = qdm2_get_vlc(gb, &vlc_tab_fft_tone_offset[local_int_8], 1, 2)) < 2) {
                 offset = 1;

From 4b0f8aed13efc53b62b5fb2eabdc55cd4c0967ae Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 18 Nov 2011 17:48:31 +0100
Subject: [PATCH 187/315] qdm2dec: fix buffer overflow. Fixes NGS00144

This also adds a few lines of code from master that are needed for this fix.

Thanks to Phillip for suggestions to improve the patch.
Found-by: Phillip Langlois
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit a6a61a6d1d4da219a6fe29250e2a6b28f9d05524)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/qdm2.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 3aef7a36e2..f9fed4f71c 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -77,6 +77,7 @@ do { \
 #define SAMPLES_NEEDED_2(why) \
      av_log (NULL,AV_LOG_INFO,"This file triggers some missing code. Please contact the developers.\nPosition: %s\n",why);
 
+#define QDM2_MAX_FRAME_SIZE 512
 
 typedef int8_t sb_int8_array[2][30][64];
 
@@ -169,7 +170,7 @@ typedef struct {
     /// I/O data
     const uint8_t *compressed_data;
     int compressed_size;
-    float output_buffer[1024];
+    float output_buffer[QDM2_MAX_FRAME_SIZE * MPA_MAX_CHANNELS * 2];
 
     /// Synthesis filter
     DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512*2]);
@@ -1845,7 +1846,8 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     // something like max decodable tones
     s->group_order = av_log2(s->group_size) + 1;
     s->frame_size = s->group_size / 16; // 16 iterations per super block
-    if (s->frame_size > FF_ARRAY_ELEMS(s->output_buffer) / 2)
+
+    if (s->frame_size > QDM2_MAX_FRAME_SIZE)
         return AVERROR_INVALIDDATA;
 
     s->sub_sampling = s->fft_order - 7;
@@ -1915,6 +1917,9 @@ static void qdm2_decode (QDM2Context *q, const uint8_t *in, int16_t *out)
     int ch, i;
     const int frame_size = (q->frame_size * q->channels);
 
+    if((unsigned)frame_size > FF_ARRAY_ELEMS(q->output_buffer)/2)
+        return -1;
+
     /* select input buffer */
     q->compressed_data = in;
     q->compressed_size = q->checksum_size;

From cc2580e6e98c6a9687aae6d62341d188fa31efd5 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 18 Nov 2011 19:10:21 +0100
Subject: [PATCH 188/315] svq1dec: call avcodec_set_dimensions() after
 dimensions changed. Fixes NGS00148

Found-by: Phillip Langlois
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 4931c8f0f10bf8dedcf626104a6b85bfefadc6f2)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 661ee45f8881bb551eb403472e60c38a7c2818aa)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/svq1dec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index 7fef10bb99..7c4e5c960c 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -676,6 +676,7 @@ static int svq1_decode_frame(AVCodecContext *avctx,
 #endif
     return result;
   }
+  avcodec_set_dimensions(avctx, s->width, s->height);
 
   //FIXME this avoids some confusion for "B frames" without 2 references
   //this should be removed after libavcodec can handle more flexible picture types & ordering

From b769df8ff248364d1e4c084121a9eb32cf8befad Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 21 Nov 2011 22:22:04 +0100
Subject: [PATCH 189/315] update for 0.5.6

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 RELEASE | 10 ++++++++++
 VERSION |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/RELEASE b/RELEASE
index ff438bdd9d..ffd6fa81d6 100644
--- a/RELEASE
+++ b/RELEASE
@@ -153,3 +153,13 @@ corrected. Additional, this release contains fixes for compilation with
 gcc-4.6. Distributors and system integrators are encouraged to update
 and share their patches against this branch.
 
+
+
+* 0.5.6 Nov 21, 2011
+
+General notes
+-------------
+
+This maintenance-only release addresses several security issues that
+were brought to our attention.
+
diff --git a/VERSION b/VERSION
index d1d899fa33..b49b25336d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.5
+0.5.6

From 30ee6c1995cdc2ccc9cdc79cc51172c141fd24bf Mon Sep 17 00:00:00 2001
From: Baptiste Coudurier <baptiste.coudurier@gmail.com>
Date: Fri, 19 Nov 2010 06:52:30 +0000
Subject: [PATCH 190/315] Fix qdm2 decoder packet handling to match the api

Originally committed as revision 25767 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit b26c1a8b7ed1a199b19f92bb5d62c61f1c149215)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/qdm2.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index a3373a16d9..95c8c97131 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1906,7 +1906,7 @@ static av_cold int qdm2_decode_close(AVCodecContext *avctx)
 }
 
 
-static void qdm2_decode (QDM2Context *q, const uint8_t *in, int16_t *out)
+static int qdm2_decode (QDM2Context *q, const uint8_t *in, int16_t *out)
 {
     int ch, i;
     const int frame_size = (q->frame_size * q->channels);
@@ -1942,7 +1942,7 @@ static void qdm2_decode (QDM2Context *q, const uint8_t *in, int16_t *out)
 
         if (!q->has_errors && q->sub_packet_list_C[0].packet != NULL) {
             SAMPLES_NEEDED_2("has errors, and C list is not empty")
-            return;
+            return -1;
         }
     }
 
@@ -1963,6 +1963,8 @@ static void qdm2_decode (QDM2Context *q, const uint8_t *in, int16_t *out)
 
         out[i] = value;
     }
+
+    return 0;
 }
 
 
@@ -1971,25 +1973,26 @@ static int qdm2_decode_frame(AVCodecContext *avctx,
             const uint8_t *buf, int buf_size)
 {
     QDM2Context *s = avctx->priv_data;
+    int16_t *out = data;
+    int i;
 
     if(!buf)
         return 0;
     if(buf_size < s->checksum_size)
         return -1;
 
-    *data_size = s->channels * s->frame_size * sizeof(int16_t);
-
     av_log(avctx, AV_LOG_DEBUG, "decode(%d): %p[%d] -> %p[%d]\n",
        buf_size, buf, s->checksum_size, data, *data_size);
 
-    qdm2_decode(s, buf, data);
-
-    // reading only when next superblock found
-    if (s->sub_packet == 0) {
-        return s->checksum_size;
+    for (i = 0; i < 16; i++) {
+        if (qdm2_decode(s, buf, out) < 0)
+            return -1;
+        out += s->channels * s->frame_size;
     }
 
-    return 0;
+    *data_size = (uint8_t*)out - (uint8_t*)data;
+
+    return buf_size;
 }
 
 AVCodec qdm2_decoder =

From 60eebf5c1208b844248e50487643286c761760d5 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 14 Sep 2011 13:57:04 -0400
Subject: [PATCH 191/315] qdm2: check output buffer size before decoding

(cherry picked from commit 7d49f79f1cd47783a963a757a6563b9cac29db62)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 73472053516f82b7d273a3d42c583f894077a191)

Conflicts:

	libavcodec/qdm2.c
(cherry picked from commit cfb9b47a1ecdc9e88e6561aa213d98245ee70267)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/qdm2.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 95c8c97131..023fe99ad1 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1974,13 +1974,20 @@ static int qdm2_decode_frame(AVCodecContext *avctx,
 {
     QDM2Context *s = avctx->priv_data;
     int16_t *out = data;
-    int i;
+    int i, out_size;
 
     if(!buf)
         return 0;
     if(buf_size < s->checksum_size)
         return -1;
 
+    out_size = 16 * s->channels * s->frame_size *
+               av_get_bits_per_sample_format(avctx->sample_fmt)/8;
+    if (*data_size < out_size) {
+        av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n");
+        return AVERROR(EINVAL);
+    }
+
     av_log(avctx, AV_LOG_DEBUG, "decode(%d): %p[%d] -> %p[%d]\n",
        buf_size, buf, s->checksum_size, data, *data_size);
 
@@ -1990,7 +1997,7 @@ static int qdm2_decode_frame(AVCodecContext *avctx,
         out += s->channels * s->frame_size;
     }
 
-    *data_size = (uint8_t*)out - (uint8_t*)data;
+    *data_size = out_size;
 
     return buf_size;
 }

From 1a5309540663a844080e9eec09477ea909a50efe Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 1 Oct 2011 00:45:05 +0200
Subject: [PATCH 192/315] Check for out of bound writes in the QDM2 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
(cherry picked from commit 291d74a46d32183653db07818c7b3407fd50a288)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit a31ccacb1a9b2abc0e140a812fb0ffca6f7c2591)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/qdm2.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 023fe99ad1..0a21bc8b81 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -77,6 +77,7 @@ do { \
 #define SAMPLES_NEEDED_2(why) \
      av_log (NULL,AV_LOG_INFO,"This file triggers some missing code. Please contact the developers.\nPosition: %s\n",why);
 
+#define QDM2_MAX_FRAME_SIZE 512
 
 typedef int8_t sb_int8_array[2][30][64];
 
@@ -169,7 +170,7 @@ typedef struct {
     /// I/O data
     const uint8_t *compressed_data;
     int compressed_size;
-    float output_buffer[1024];
+    float output_buffer[QDM2_MAX_FRAME_SIZE * 2];
 
     /// Synthesis filter
     DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512*2]);
@@ -1821,6 +1822,8 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
 
     avctx->channels = s->nb_channels = s->channels = AV_RB32(extradata);
     extradata += 4;
+    if (s->channels > MPA_MAX_CHANNELS)
+        return AVERROR_INVALIDDATA;
 
     avctx->sample_rate = AV_RB32(extradata);
     extradata += 4;
@@ -1843,6 +1846,8 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     // something like max decodable tones
     s->group_order = av_log2(s->group_size) + 1;
     s->frame_size = s->group_size / 16; // 16 iterations per super block
+    if (s->frame_size > QDM2_MAX_FRAME_SIZE)
+        return AVERROR_INVALIDDATA;
 
     s->sub_sampling = s->fft_order - 7;
     s->frequency_range = 255 / (1 << (2 - s->sub_sampling));

From 8abf1d882eee75e9fe2b31f387afaf5ad339f4b3 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sat, 1 Oct 2011 00:45:04 +0200
Subject: [PATCH 193/315] Fix out of bound reads in the QDM2 decoder.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
(cherry picked from commit 5a19acb17ceb71657b0eec51dac651953520e5c8)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 0d93d5c4614fafea74bdac681673f5b32eb49063)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/qdm2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 0a21bc8b81..0a4840242f 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1378,6 +1378,8 @@ static void qdm2_fft_decode_tones (QDM2Context *q, int duration, GetBitContext *
             return;
 
         local_int_14 = (offset >> local_int_8);
+        if (local_int_14 >= FF_ARRAY_ELEMS(fft_level_index_table))
+            return;
 
         if (q->nb_channels > 1) {
             channel = get_bits1(gb);

From 7739947671f6b8b9f0f56132bc6b389baad404fd Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 3 Nov 2011 15:55:52 -0700
Subject: [PATCH 194/315] vp6: Fix illegal read.

(cherry picked from commit 2a6eb06254df79e96b3d791b6b89b2534ced3119)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 67a7ed623b678a84c992dd7bf3e3d0329f83621b)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 8d68083298e2481669de4db0b7b86c915119df6d)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vp6.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 5071903701..9197b4609b 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -365,7 +365,7 @@ static void vp6_parse_coeff_huffman(VP56Context *s)
         if (b > 3) pt = 1;
         vlc_coeff = &s->dccv_vlc[pt];
 
-        for (coeff_idx=0; coeff_idx<64; ) {
+        for (coeff_idx = 0;;) {
             int run = 1;
             if (coeff_idx<2 && s->nb_null[coeff_idx][pt]) {
                 s->nb_null[coeff_idx][pt]--;
@@ -400,6 +400,8 @@ static void vp6_parse_coeff_huffman(VP56Context *s)
                 }
             }
             coeff_idx+=run;
+            if (coeff_idx >= 64)
+                break;
             cg = FFMIN(vp6_coeff_groups[coeff_idx], 3);
             vlc_coeff = &s->ract_vlc[pt][ct][cg];
         }

From 771ceb19f293df9a447d38820f4def8f58aa0b2e Mon Sep 17 00:00:00 2001
From: Thierry Foucu <tfoucu@gmail.com>
Date: Thu, 17 Nov 2011 09:39:52 -0800
Subject: [PATCH 195/315] vp6: Fix illegal read.

Found with Address Sanitizer

Signed-off-by: Alex Converse <alex.converse@gmail.com>
(cherry picked from commit e0966eb140b3569b3d6b5b5008961944ef229c06)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit ba4b08b78918f399f9c9524750b26e904d146078)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 94aacaf5083313378c6105bd71db04ce8f62c058)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vp6.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 9197b4609b..d16a13eba0 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -429,7 +429,8 @@ static void vp6_parse_coeff(VP56Context *s)
         model1 = model->coeff_dccv[pt];
         model2 = model->coeff_dcct[pt][ctx];
 
-        for (coeff_idx=0; coeff_idx<64; ) {
+        coeff_idx = 0;
+        for (;;) {
             if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) {
                 /* parse a coeff */
                 if (vp56_rac_get_prob(c, model2[2])) {
@@ -470,8 +471,10 @@ static void vp6_parse_coeff(VP56Context *s)
                             run += vp56_rac_get_prob(c, model3[i+8]) << i;
                 }
             }
-
-            cg = vp6_coeff_groups[coeff_idx+=run];
+            coeff_idx += run;
+            if (coeff_idx >= 64)
+                break;
+            cg = vp6_coeff_groups[coeff_idx];
             model1 = model2 = model->coeff_ract[pt][ct][cg];
         }
 

From 9767ea7aa77a331f73268758af2e1858222f5915 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Fri, 23 Sep 2011 22:36:11 +0200
Subject: [PATCH 196/315] vp6: Reset the internal state when aborting key
 frames header parsing

It prevents leaving the state only half initialized.

Signed-off-by: Janne Grunau <janne-libav@jannau.net>
(cherry picked from commit a72cad0a6c05aa74940101e937cb3dc602d7d67b)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit c76505e0dee0890e39636ddebd2707ab3ea5b8de)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit e28bb18fdc894dfdc1befa9f5e748ccb649a8c76)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vp6.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index d16a13eba0..288227b174 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -136,8 +136,11 @@ static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
     if (coeff_offset) {
         buf      += coeff_offset;
         buf_size -= coeff_offset;
-        if (buf_size < 0)
+        if (buf_size < 0) {
+            if (s->framep[VP56_FRAME_CURRENT]->key_frame)
+                avcodec_set_dimensions(s->avctx, 0, 0);
             return 0;
+        }
         if (s->use_huffman) {
             s->parse_coeff = vp6_parse_coeff_huffman;
             init_get_bits(&s->gb, buf, buf_size<<3);

From 280590e3380d8649e5b191c653b0f974626c8eeb Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Wed, 3 Mar 2010 17:24:32 +0000
Subject: [PATCH 197/315] Plug some memory leaks in the VP6 decoder

Originally committed as revision 22172 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 0a41faa9a77dc83d8d933e99f1ba902ecd146e79)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vp56.c | 11 +++++++++++
 libavcodec/vp6.c  |  1 +
 2 files changed, 12 insertions(+)

diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index ad11b5289a..c09dbeb2f8 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -685,6 +685,7 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
 av_cold int vp56_free(AVCodecContext *avctx)
 {
     VP56Context *s = avctx->priv_data;
+    int pt;
 
     av_free(s->above_blocks);
     av_free(s->macroblocks);
@@ -695,5 +696,15 @@ av_cold int vp56_free(AVCodecContext *avctx)
         avctx->release_buffer(avctx, s->framep[VP56_FRAME_GOLDEN2]);
     if (s->framep[VP56_FRAME_PREVIOUS]->data[0])
         avctx->release_buffer(avctx, s->framep[VP56_FRAME_PREVIOUS]);
+
+    for (pt=0; pt < 2; pt++) {
+        int ct, cg;
+        free_vlc(&s->dccv_vlc[pt]);
+        free_vlc(&s->runv_vlc[pt]);
+        for (ct=0; ct<3; ct++)
+            for (cg = 0; cg < 6; cg++)
+                free_vlc(&s->ract_vlc[pt][ct][cg]);
+    }
+
     return 0;
 }
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 288227b174..0d1b12c4bf 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -230,6 +230,7 @@ static void vp6_build_huff_tree(VP56Context *s, uint8_t coeff_model[],
         nodes[map[2*i+1]].count = b + !b;
     }
 
+    free_vlc(vlc);
     /* then build the huffman tree accodring to probabilities */
     ff_huff_build_tree(s->avctx, vlc, size, nodes, vp6_huff_cmp,
                        FF_HUFFMAN_FLAG_HNODE_FIRST);

From 185abfb2187aae040171b00d32e6b04bdec7acf3 Mon Sep 17 00:00:00 2001
From: Dustin Brody <libav@parsoma.net>
Date: Tue, 16 Aug 2011 16:46:34 -0400
Subject: [PATCH 198/315] vp6: partially propagate huffman tree building errors
 during coeff model parsing and fix misspelling

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
(cherry picked from commit f913eeea43078b3b9052efd8d8d29e7b29b39208)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 7367cbec1b8cf0cbb49707fb0fdfded8ec397b0d)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 201fcfb89482c6f73d6b679a294aac8da9612bbd)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vp6.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 0d1b12c4bf..d9e9711cca 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -215,8 +215,8 @@ static int vp6_huff_cmp(const void *va, const void *vb)
     return (a->count - b->count)*16 + (b->sym - a->sym);
 }
 
-static void vp6_build_huff_tree(VP56Context *s, uint8_t coeff_model[],
-                                const uint8_t *map, unsigned size, VLC *vlc)
+static int vp6_build_huff_tree(VP56Context *s, uint8_t coeff_model[],
+                               const uint8_t *map, unsigned size, VLC *vlc)
 {
     Node nodes[2*size], *tmp = &nodes[size];
     int a, b, i;
@@ -231,9 +231,9 @@ static void vp6_build_huff_tree(VP56Context *s, uint8_t coeff_model[],
     }
 
     free_vlc(vlc);
-    /* then build the huffman tree accodring to probabilities */
-    ff_huff_build_tree(s->avctx, vlc, size, nodes, vp6_huff_cmp,
-                       FF_HUFFMAN_FLAG_HNODE_FIRST);
+    /* then build the huffman tree according to probabilities */
+    return ff_huff_build_tree(s->avctx, vlc, size, nodes, vp6_huff_cmp,
+                              FF_HUFFMAN_FLAG_HNODE_FIRST);
 }
 
 static void vp6_parse_coeff_models(VP56Context *s)

From 4a1c3df592a2151f037dcf3f9f6d695da85c4511 Mon Sep 17 00:00:00 2001
From: Laurent Aimar <fenrir@videolan.org>
Date: Sun, 11 Sep 2011 19:17:45 +0200
Subject: [PATCH 199/315] vmd: fix segfaults on corruped streams

Signed-off-by: Janne Grunau <janne-libav@jannau.net>
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 494cfacdb9ba3f0549e37f76b3a2f86a7aeeac3c)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit b99366faef3a1ed4a34c9b37107f2c8c24702813)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vmdav.c | 104 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 80 insertions(+), 24 deletions(-)

diff --git a/libavcodec/vmdav.c b/libavcodec/vmdav.c
index 1921c81ca2..60dc7f958c 100644
--- a/libavcodec/vmdav.c
+++ b/libavcodec/vmdav.c
@@ -73,9 +73,11 @@ typedef struct VmdVideoContext {
 #define QUEUE_SIZE 0x1000
 #define QUEUE_MASK 0x0FFF
 
-static void lz_unpack(const unsigned char *src, unsigned char *dest, int dest_len)
+static void lz_unpack(const unsigned char *src, int src_len,
+                      unsigned char *dest, int dest_len)
 {
     const unsigned char *s;
+    unsigned int s_len;
     unsigned char *d;
     unsigned char *d_end;
     unsigned char queue[QUEUE_SIZE];
@@ -88,13 +90,16 @@ static void lz_unpack(const unsigned char *src, unsigned char *dest, int dest_le
     unsigned int i, j;
 
     s = src;
+    s_len = src_len;
     d = dest;
     d_end = d + dest_len;
     dataleft = AV_RL32(s);
-    s += 4;
+    s += 4; s_len -= 4;
     memset(queue, 0x20, QUEUE_SIZE);
+    if (s_len < 4)
+        return;
     if (AV_RL32(s) == 0x56781234) {
-        s += 4;
+        s += 4; s_len -= 4;
         qpos = 0x111;
         speclen = 0xF + 3;
     } else {
@@ -102,32 +107,41 @@ static void lz_unpack(const unsigned char *src, unsigned char *dest, int dest_le
         speclen = 100;  /* no speclen */
     }
 
-    while (dataleft > 0) {
-        tag = *s++;
+    while (dataleft > 0 && s_len > 0) {
+        tag = *s++; s_len--;
         if ((tag == 0xFF) && (dataleft > 8)) {
-            if (d + 8 > d_end)
+            if (d + 8 > d_end || s_len < 8)
                 return;
             for (i = 0; i < 8; i++) {
                 queue[qpos++] = *d++ = *s++;
                 qpos &= QUEUE_MASK;
             }
+            s_len -= 8;
             dataleft -= 8;
         } else {
             for (i = 0; i < 8; i++) {
                 if (dataleft == 0)
                     break;
                 if (tag & 0x01) {
-                    if (d + 1 > d_end)
+                    if (d + 1 > d_end || s_len < 1)
                         return;
                     queue[qpos++] = *d++ = *s++;
                     qpos &= QUEUE_MASK;
                     dataleft--;
+                    s_len--;
                 } else {
+                    if (s_len < 2)
+                        return;
                     chainofs = *s++;
                     chainofs |= ((*s & 0xF0) << 4);
                     chainlen = (*s++ & 0x0F) + 3;
-                    if (chainlen == speclen)
+                    s_len -= 2;
+                    if (chainlen == speclen) {
+                        if (s_len < 1)
+                            return;
                         chainlen = *s++ + 0xF + 3;
+                        s_len--;
+                    }
                     if (d + chainlen > d_end)
                         return;
                     for (j = 0; j < chainlen; j++) {
@@ -144,7 +158,7 @@ static void lz_unpack(const unsigned char *src, unsigned char *dest, int dest_le
 }
 
 static int rle_unpack(const unsigned char *src, unsigned char *dest,
-    int src_len, int dest_len)
+    int src_count, int src_size, int dest_len)
 {
     const unsigned char *ps;
     unsigned char *pd;
@@ -153,31 +167,40 @@ static int rle_unpack(const unsigned char *src, unsigned char *dest,
 
     ps = src;
     pd = dest;
-    if (src_len & 1)
+    if (src_count & 1) {
+        if (src_size < 1)
+            return 0;
         *pd++ = *ps++;
+        src_size--;
+    }
 
-    src_len >>= 1;
+    src_count >>= 1;
     i = 0;
     do {
+        if (src_size < 1)
+            break;
         l = *ps++;
+        src_size--;
         if (l & 0x80) {
             l = (l & 0x7F) * 2;
-            if (pd + l > dest_end)
+            if (pd + l > dest_end || src_size < l)
                 return ps - src;
             memcpy(pd, ps, l);
             ps += l;
+            src_size -= l;
             pd += l;
         } else {
-            if (pd + i > dest_end)
+            if (pd + i > dest_end || src_size < 2)
                 return ps - src;
             for (i = 0; i < l; i++) {
                 *pd++ = ps[0];
                 *pd++ = ps[1];
             }
             ps += 2;
+            src_size -= 2;
         }
         i += l;
-    } while (i < src_len);
+    } while (i < src_count);
 
     return ps - src;
 }
@@ -192,6 +215,7 @@ static void vmd_decode(VmdVideoContext *s)
     const unsigned char *p = s->buf + 16;
 
     const unsigned char *pb;
+    unsigned int pb_size;
     unsigned char meth;
     unsigned char *dp;   /* pointer to current frame */
     unsigned char *pp;   /* pointer to previous frame */
@@ -206,6 +230,16 @@ static void vmd_decode(VmdVideoContext *s)
     frame_y = AV_RL16(&s->buf[8]);
     frame_width = AV_RL16(&s->buf[10]) - frame_x + 1;
     frame_height = AV_RL16(&s->buf[12]) - frame_y + 1;
+    if (frame_x < 0 || frame_width < 0 ||
+        frame_x >= s->avctx->width ||
+        frame_width > s->avctx->width ||
+        frame_x + frame_width > s->avctx->width)
+        return;
+    if (frame_y < 0 || frame_height < 0 ||
+        frame_y >= s->avctx->height ||
+        frame_height > s->avctx->height ||
+        frame_y + frame_height > s->avctx->height)
+        return;
 
     if ((frame_width == s->avctx->width && frame_height == s->avctx->height) &&
         (frame_x || frame_y)) {
@@ -218,8 +252,9 @@ static void vmd_decode(VmdVideoContext *s)
 
     /* if only a certain region will be updated, copy the entire previous
      * frame before the decode */
-    if (frame_x || frame_y || (frame_width != s->avctx->width) ||
-        (frame_height != s->avctx->height)) {
+    if (s->prev_frame.data[0] &&
+        (frame_x || frame_y || (frame_width != s->avctx->width) ||
+        (frame_height != s->avctx->height))) {
 
         memcpy(s->frame.data[0], s->prev_frame.data[0],
             s->avctx->height * s->frame.linesize[0]);
@@ -237,14 +272,19 @@ static void vmd_decode(VmdVideoContext *s)
         }
         s->size -= (256 * 3 + 2);
     }
-    if (s->size >= 0) {
+    if (s->size > 0) {
         /* originally UnpackFrame in VAG's code */
         pb = p;
-        meth = *pb++;
+        pb_size = s->buf + s->size - pb;
+        if (pb_size < 1)
+            return;
+        meth = *pb++; pb_size--;
         if (meth & 0x80) {
-            lz_unpack(pb, s->unpack_buffer, s->unpack_buffer_size);
+            lz_unpack(pb, pb_size,
+                      s->unpack_buffer, s->unpack_buffer_size);
             meth &= 0x7F;
             pb = s->unpack_buffer;
+            pb_size = s->unpack_buffer_size;
         }
 
         dp = &s->frame.data[0][frame_y * s->frame.linesize[0] + frame_x];
@@ -255,17 +295,21 @@ static void vmd_decode(VmdVideoContext *s)
             for (i = 0; i < frame_height; i++) {
                 ofs = 0;
                 do {
+                    if (pb_size < 1)
+                        return;
                     len = *pb++;
+                    pb_size--;
                     if (len & 0x80) {
                         len = (len & 0x7F) + 1;
-                        if (ofs + len > frame_width)
+                        if (ofs + len > frame_width || pb_size < len)
                             return;
                         memcpy(&dp[ofs], pb, len);
                         pb += len;
+                        pb_size -= len;
                         ofs += len;
                     } else {
                         /* interframe pixel copy */
-                        if (ofs + len + 1 > frame_width)
+                        if (ofs + len + 1 > frame_width || !s->prev_frame.data[0])
                             return;
                         memcpy(&dp[ofs], &pp[ofs], len + 1);
                         ofs += len + 1;
@@ -283,8 +327,11 @@ static void vmd_decode(VmdVideoContext *s)
 
         case 2:
             for (i = 0; i < frame_height; i++) {
+                if (pb_size < frame_width)
+                    return;
                 memcpy(dp, pb, frame_width);
                 pb += frame_width;
+                pb_size -= frame_width;
                 dp += s->frame.linesize[0];
                 pp += s->prev_frame.linesize[0];
             }
@@ -294,18 +341,27 @@ static void vmd_decode(VmdVideoContext *s)
             for (i = 0; i < frame_height; i++) {
                 ofs = 0;
                 do {
+                    if (pb_size < 1)
+                        return;
                     len = *pb++;
+                    pb_size--;
                     if (len & 0x80) {
                         len = (len & 0x7F) + 1;
+                        if (pb_size < 1)
+                            return;
                         if (*pb++ == 0xFF)
-                            len = rle_unpack(pb, &dp[ofs], len, frame_width - ofs);
-                        else
+                            len = rle_unpack(pb, &dp[ofs], len, pb_size, frame_width - ofs);
+                        else {
+                            if (pb_size < len)
+                                return;
                             memcpy(&dp[ofs], pb, len);
+                        }
                         pb += len;
+                        pb_size -= 1 + len;
                         ofs += len;
                     } else {
                         /* interframe pixel copy */
-                        if (ofs + len + 1 > frame_width)
+                        if (ofs + len + 1 > frame_width || !s->prev_frame.data[0])
                             return;
                         memcpy(&dp[ofs], &pp[ofs], len + 1);
                         ofs += len + 1;

From 110aff4b2451bf8768523f6c7f054d2c04cd2a26 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 18 Nov 2011 19:10:21 +0100
Subject: [PATCH 200/315] svq1dec: call avcodec_set_dimensions() after
 dimensions changed.

Fixes NGS00148, CVE-2011-4579

Found-by: Phillip Langlois
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

(cherry picked from commit 6e24b9488e67849a28e64a8056e05f83cf439229)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 0eca0da06e40b73af495cc05fbcfaa030fcf78ea)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 8ddc0b491d3c9c11c1e3d638fda51b4b604d32f4)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/svq1dec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index 7fef10bb99..7c4e5c960c 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -676,6 +676,7 @@ static int svq1_decode_frame(AVCodecContext *avctx,
 #endif
     return result;
   }
+  avcodec_set_dimensions(avctx, s->width, s->height);
 
   //FIXME this avoids some confusion for "B frames" without 2 references
   //this should be removed after libavcodec can handle more flexible picture types & ordering

From 23f228a0d0ad3049a99dbf586e4139d86b6297be Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sat, 24 Dec 2011 16:32:06 +0100
Subject: [PATCH 201/315] Bump version number for 0.5.6 release.

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index d1d899fa33..b49b25336d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.5
+0.5.6

From d0688fdd3101d900a3e3aac4e36bf7ef1eae01ad Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 25 Dec 2011 09:55:45 +0100
Subject: [PATCH 202/315] Release notes and changelog for 0.5.6

---
 Changelog | 16 ++++++++++++++++
 RELEASE   | 17 +++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/Changelog b/Changelog
index 173cc00acf..1d9eb79284 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,22 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 
+version 0.5.6:
+- svq1dec: call avcodec_set_dimensions() after dimensions changed. (NGS00148, CVE-2011-4579)
+- vmd: fix segfaults on corruped streams (CVE-2011-4364)
+- commits related to CVE-2011-4353:
+  - vp6: partially propagate huffman tree building errors during coeff model parsing and fix misspelling
+  - Plug some memory leaks in the VP6 decoder
+  - vp6: Reset the internal state when aborting key frames header parsing
+  - vp6: Fix illegal read.
+  - vp6: Fix illegal read.
+  - Fix out of bound reads in the QDM2 decoder.
+- commits related to CVE-2011-4351:
+  - Check for out of bound writes in the QDM2 decoder.
+  - qdm2: check output buffer size before decoding
+  - Fix qdm2 decoder packet handling to match the api
+
+
 version 0.5.5:
 
 - Fix memory (re)allocation in matroskadec.c (MSVR11-011/CVE-2011-3504)
diff --git a/RELEASE b/RELEASE
index 75099adfac..364a327934 100644
--- a/RELEASE
+++ b/RELEASE
@@ -153,3 +153,20 @@ corrected. Additional, this release contains fixes for compilation with
 gcc-4.6. Distributors and system integrators are encouraged to update
 and share their patches against this branch.
 
+
+
+* 0.5.6 Dec 25, 2011
+
+General notes
+-------------
+
+This maintenance-only release addresses several security issues that
+were brought to our attention. In details, it features fixes for the
+QDM2 decoder (CVE-2011-4351), DoS in the VP5/VP6 decoders
+(CVE-2011-4353), and a buffer overflow in the Sierra VMD decoder
+CVE-2011-4364, and a safety fix in the SVQ1 decoder (CVE-2011-4579).
+CVE-2011-4352, a bug in the VP3 decoder, is not known to affect this
+release.
+
+Distributors and system integrators are encouraged to update and share
+their patches against this branch.

From e75056bc549fe13bb6d29a8a67a6a2babf060417 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 25 Dec 2011 21:43:56 +0100
Subject: [PATCH 203/315] Update for 0.5.7

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index b49b25336d..d3532a107e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.6
+0.5.7

From 3eb6983dbcfafc639ad3b9e34a5b4f8ff736310b Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 28 Oct 2011 23:50:04 -0700
Subject: [PATCH 204/315] vp3: fix oob read for negative tokens and memleaks on
 error.

(cherry picked from commit 8370e426e42f2e4b9d14a1fb8107ecfe5163ce7f)

Fixes: #189
Chromium-Bug: 101172,100465
CVE-2011-3892

Removed the parts that are related to multi-threading, which is not
included before 0.7.

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit c624935554332f8921a15265b8720f0c7b3c8cc2)

Conflicts:

	libavcodec/vp3.c
(cherry picked from commit c9c7db0af2a0fc14764a07f0e61cebf11238e3c2)

Conflicts:

	libavcodec/vp3.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vp3.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 429c4f98a4..69248d6775 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -1011,12 +1011,12 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
             /* decode a VLC into a token */
             token = get_vlc2(gb, table->table, 5, 3);
             /* use the token to get a zero run, a coefficient, and an eob run */
-            if (token <= 6) {
+            if ((unsigned) token <= 6U) {
                 eob_run = eob_run_base[token];
                 if (eob_run_get_bits[token])
                     eob_run += get_bits(gb, eob_run_get_bits[token]);
                 coeff = zero_run = 0;
-            } else {
+            } else if (token >= 0) {
                 bits_to_get = coeff_get_bits[token];
                 if (!bits_to_get)
                     coeff = coeff_tables[token][0];
@@ -1026,6 +1026,10 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
                 zero_run = zero_run_base[token];
                 if (zero_run_get_bits[token])
                     zero_run += get_bits(gb, zero_run_get_bits[token]);
+            } else {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "Invalid token %d\n", token);
+                return -1;
             }
         }
 
@@ -1071,6 +1075,8 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
     /* unpack the C plane DC coefficients */
     residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0,
         s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+    if (residual_eob_run < 0)
+        return residual_eob_run;
 
     /* fetch the AC table indexes */
     ac_y_table = get_bits(gb, 4);
@@ -1080,36 +1086,52 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
     for (i = 1; i <= 5; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
     }
 
     /* unpack the group 2 AC coefficients (coeffs 6-14) */
     for (i = 6; i <= 14; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
     }
 
     /* unpack the group 3 AC coefficients (coeffs 15-27) */
     for (i = 15; i <= 27; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
     }
 
     /* unpack the group 4 AC coefficients (coeffs 28-63) */
     for (i = 28; i <= 63; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+        if (residual_eob_run < 0)
+            return residual_eob_run;
     }
 
     return 0;

From 665421f3b1a626610206410e5dc12e5e236b92a2 Mon Sep 17 00:00:00 2001
From: Chris Evans <cevans@chromium.org>
Date: Thu, 5 Jan 2012 21:25:41 +0100
Subject: [PATCH 205/315] vorbis: Avoid some out-of-bounds reads

Fixes Bug: #190
Chromium Bug: #100543
Related to CVE-2011-3893

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 57cd6d709565e84e84385f8f2a9641ca3fa718be)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 4a94678f1be4b7d47f862e9523ca3358255da5d4)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 6d6254ba9fbb22260939c06db1faed5bbd295ad4)

Conflicts:

	libavcodec/vorbis.c
---
 libavcodec/vorbis.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index dbc409f8d7..13e7e65b0f 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c
@@ -146,13 +146,13 @@ void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values) {
     }
 }
 
-static void render_line(int x0, int y0, int x1, int y1, float * buf) {
+static void render_line(int x0, uint8_t y0, int x1, int y1, float * buf) {
     int dy = y1 - y0;
     int adx = x1 - x0;
     int base = dy / adx;
     int ady = FFABS(dy) - FFABS(base) * adx;
     int x = x0;
-    int y = y0;
+    uint8_t y = y0;
     int err = 0;
     int sy = dy<0 ? -1 : 1;
     buf[x] = ff_vorbis_floor1_inverse_db_table[y];
@@ -168,7 +168,8 @@ static void render_line(int x0, int y0, int x1, int y1, float * buf) {
 }
 
 void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples) {
-    int lx, ly, i;
+    int lx, i;
+    uint8_t ly;
     lx = 0;
     ly = y_list[0] * multiplier;
     for (i = 1; i < values; i++) {

From 7ee536e87a569174775dabdd959a9b12c1d2ac3d Mon Sep 17 00:00:00 2001
From: Chris Evans <cevans@chromium.org>
Date: Thu, 5 Jan 2012 21:19:30 +0100
Subject: [PATCH 206/315] matroskadec: Fix a bug where a pointer was cached to
 an array that might later move due to a realloc()

Fixes bug #190
Chromium bug #100492
related to CVE-2011-3893

Signed-off-by: Reinhard Tartler <siretart@tauware.de>

(cherry-picked from commit faaec4676cb4c7a2303d50df66c6290bc96a7657)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 1f625431e2bb9564760fba3ab8077ae07ce7c7a1)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 90a4a467477be8c292daa08a9516ee78ca0d517b)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/matroskadec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index d9ffec3727..e16cd1efd4 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1063,13 +1063,13 @@ static void matroska_convert_tags(AVFormatContext *s)
 static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
 {
     EbmlList *seekhead_list = &matroska->seekhead;
-    MatroskaSeekhead *seekhead = seekhead_list->elem;
     uint32_t level_up = matroska->level_up;
     int64_t before_pos = url_ftell(matroska->ctx->pb);
     MatroskaLevel level;
     int i;
 
     for (i=0; i<seekhead_list->nb_elem; i++) {
+        MatroskaSeekhead *seekhead = seekhead_list->elem;
         int64_t offset = seekhead[i].pos + matroska->segment_start;
 
         if (seekhead[i].pos <= before_pos

From efd453d82d5cd6c064644022a4153fe4bd45f8b7 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 5 Jan 2012 21:40:18 +0100
Subject: [PATCH 207/315] vorbisdec: Fix decoding bug with channel handling

Fixes Bug: #191
Chromium Bug: #101458
CVE-2011-3895

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit e6d527ff729e42d80e4756cab779ff4ad693631b)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 97f23c72a3815739ab28e297ce60f943349f6939)

Conflicts:

	libavcodec/vorbis_dec.c
(cherry picked from commit 42f0a6696889ba275aa2087b57fa99f7a97033a0)

Conflicts:

	libavcodec/vorbis_dec.c
---
 libavcodec/vorbis_dec.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 5b8b056393..ebaa59da1c 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -654,7 +654,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc){
         res_setup->partition_size=get_bits(gb, 24)+1;
         /* Validations to prevent a buffer overflow later. */
         if (res_setup->begin>res_setup->end
-        || res_setup->end>vc->blocksize[1]/(res_setup->type==2?1:2)
+        || res_setup->end > (res_setup->type == 2 ? vc->avccontext->channels : 1) * vc->blocksize[1] / 2
         || (res_setup->end-res_setup->begin)/res_setup->partition_size>V_MAX_PARTITIONS) {
             av_log(vc->avccontext, AV_LOG_ERROR, "partition out of bounds: type, begin, end, size, blocksize: %d, %d, %d, %d, %d\n", res_setup->type, res_setup->begin, res_setup->end, res_setup->partition_size, vc->blocksize[1]/2);
             return 1;
@@ -1505,6 +1505,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
     uint_fast8_t res_num=0;
     int_fast16_t retlen=0;
     float fadd_bias = vc->add_bias;
+    int ch_left = vc->audio_channels;
 
     if (get_bits1(gb)) {
         av_log(vc->avccontext, AV_LOG_ERROR, "Not a Vorbis I audio packet.\n");
@@ -1583,9 +1584,14 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
             }
         }
         residue=&vc->residues[mapping->submap_residue[i]];
+        if (ch_left < ch) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "Too many channels in vorbis_floor_decode.\n");
+            return -1;
+        }
         vorbis_residue_decode(vc, residue, ch, do_not_decode, ch_res_ptr, blocksize/2);
 
         ch_res_ptr+=ch*blocksize/2;
+        ch_left -= ch;
     }
 
 // Inverse coupling

From 6b01bcebb90c450b49002254aeaa978a5bbd27bb Mon Sep 17 00:00:00 2001
From: Chris Evans <cevans@chromium.org>
Date: Thu, 5 Jan 2012 21:25:41 +0100
Subject: [PATCH 208/315] vorbis: An additional defense in the Vorbis codec.

Fixes Bug: #190
Chromium Bug: #100543
Related to CVE-2011-3893

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit afb2aa537954db537d54358997b68f46561fd5a7)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit b0283ccb9e8945ce9e56f7c6ba0c676e7179d7a3)

Conflicts:

	libavcodec/vorbis_dec.c
(cherry picked from commit a5e0afe3c936220a793db0cdae04bb228f1904e0)

Conflicts:

	libavcodec/vorbis_dec.c
---
 libavcodec/vorbis_dec.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index ebaa59da1c..1321b08440 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -1293,7 +1293,7 @@ static int vorbis_floor1_decode(vorbis_context *vc, vorbis_floor_data *vfu, floa
 
 // Read and decode residue
 
-static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen, int vr_type) {
+static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen, unsigned ch_left, int vr_type) {
     GetBitContext *gb=&vc->gb;
     uint_fast8_t c_p_c=vc->codebooks[vr->classbook].dimensions;
     uint_fast16_t n_to_read=vr->end-vr->begin;
@@ -1303,6 +1303,7 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, v
     uint_fast8_t ch_used;
     uint_fast8_t i,j,l;
     uint_fast16_t k;
+    unsigned max_output = (ch - 1) * vlen;
 
     if (vr_type==2) {
         for(j=1;j<ch;++j) {
@@ -1310,8 +1311,15 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, v
         }
         if (do_not_decode[0]) return 0;
         ch_used=1;
+        max_output += vr->end / ch;
     } else {
         ch_used=ch;
+        max_output += vr->end;
+    }
+
+    if (max_output > ch_left * vlen) {
+        av_log(vc->avccontext, AV_LOG_ERROR, "Insufficient output buffer\n");
+        return -1;
     }
 
     AV_DEBUG(" residue type 0/1/2 decode begin, ch: %d  cpc %d  \n", ch, c_p_c);
@@ -1435,14 +1443,14 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, v
     return 0;
 }
 
-static inline int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen)
+static inline int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen, unsigned ch_left)
 {
     if (vr->type==2)
-        return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 2);
-    else if (vr->type==1)
-        return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 1);
-    else if (vr->type==0)
-        return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 0);
+        return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, ch_left, 2);
+    else if (vr->type == 1)
+        return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, ch_left, 1);
+    else if (vr->type == 0)
+        return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, ch_left, 0);
     else {
         av_log(vc->avccontext, AV_LOG_ERROR, " Invalid residue type while residue decode?! \n");
         return 1;
@@ -1505,7 +1513,8 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
     uint_fast8_t res_num=0;
     int_fast16_t retlen=0;
     float fadd_bias = vc->add_bias;
-    int ch_left = vc->audio_channels;
+    unsigned ch_left = vc->audio_channels;
+    unsigned vlen;
 
     if (get_bits1(gb)) {
         av_log(vc->avccontext, AV_LOG_ERROR, "Not a Vorbis I audio packet.\n");
@@ -1528,12 +1537,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
 
     blockflag=vc->modes[mode_number].blockflag;
     blocksize=vc->blocksize[blockflag];
+    vlen = blocksize / 2;
     if (blockflag) {
         skip_bits(gb, 2); // previous_window, next_window
     }
 
-    memset(ch_res_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
-    memset(ch_floor_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
+    memset(ch_res_ptr, 0, sizeof(float)*vc->audio_channels*vlen); //FIXME can this be removed ?
+    memset(ch_floor_ptr, 0, sizeof(float)*vc->audio_channels*vlen); //FIXME can this be removed ?
 
 // Decode floor
 
@@ -1553,7 +1563,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
             return -1;
         }
         no_residue[i] = ret;
-        ch_floor_ptr += blocksize / 2;
+        ch_floor_ptr += vlen;
     }
 
 // Nonzero vector propagate
@@ -1570,6 +1580,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
     for(i=0;i<mapping->submaps;++i) {
         vorbis_residue *residue;
         uint_fast8_t ch=0;
+        int ret;
 
         for(j=0;j<vc->audio_channels;++j) {
             if ((mapping->submaps==1) || (i==mapping->mux[j])) {
@@ -1588,9 +1599,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
             av_log(vc->avccontext, AV_LOG_ERROR, "Too many channels in vorbis_floor_decode.\n");
             return -1;
         }
-        vorbis_residue_decode(vc, residue, ch, do_not_decode, ch_res_ptr, blocksize/2);
+        if (ch) {
+            ret = vorbis_residue_decode(vc, residue, ch, do_not_decode, ch_res_ptr, vlen, ch_left);
+            if (ret < 0)
+                return ret;
+        }
 
-        ch_res_ptr+=ch*blocksize/2;
+        ch_res_ptr += ch * vlen;
         ch_left -= ch;
     }
 

From ec0124203c9bf12b926b87544ced79c8b78c75ce Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 10 Jan 2012 21:23:27 +0100
Subject: [PATCH 209/315] Bump version number for 0.5.7 release.

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index b49b25336d..d3532a107e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.6
+0.5.7

From 15df4428d264287ec1577f92296b178f86cbe14d Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Tue, 10 Jan 2012 22:22:05 +0100
Subject: [PATCH 210/315] Release notes and changelog for 0.5.7

---
 Changelog | 10 ++++++++++
 RELEASE   | 17 +++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/Changelog b/Changelog
index 1d9eb79284..89903eb1ad 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,16 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 
+version 0.5.7:
+- vorbis: An additional defense in the Vorbis codec. (CVE-2011-3895)
+- vorbisdec: Fix decoding bug with channel handling.
+- matroskadec: Fix a bug where a pointer was cached to an array that might
+  later move due to a realloc(). (CVE-2011-3893)
+- vorbis: Avoid some out-of-bounds reads. (CVE-2011-3893)
+- vp3: fix oob read for negative tokens and memleaks on error, (CVE-2011-3892)
+- vp3: fix streams with non-zero last coefficient.
+
+
 version 0.5.6:
 - svq1dec: call avcodec_set_dimensions() after dimensions changed. (NGS00148, CVE-2011-4579)
 - vmd: fix segfaults on corruped streams (CVE-2011-4364)
diff --git a/RELEASE b/RELEASE
index 364a327934..e68779b5b2 100644
--- a/RELEASE
+++ b/RELEASE
@@ -170,3 +170,20 @@ release.
 
 Distributors and system integrators are encouraged to update and share
 their patches against this branch.
+
+
+
+* 0.5.7 Jan 11, 2012
+
+General notes
+-------------
+
+This mostly maintenance-only release that addresses a number a number of
+bugs such as security and compilation issues that have been brought to
+our attention. Among other (rather minor) fixes, this release features
+fixes for the VP3 decoder (CVE-2011-3892), vorbis decoder, and matroska
+demuxer (CVE-2011-3893 and CVE-2011-3895).
+
+Distributors and system integrators are encouraged
+to update and share their patches against this branch.  For a full list
+of changes please see the Changelog file.

From f13de3c653168f9d9a0bf8653b175035a5d9230b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 17 Dec 2011 03:18:58 +0100
Subject: [PATCH 211/315] atrac3: Fix crash in tonal component decoding. Fixes
 Ticket780 Bug Found by: cosminamironesei

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 9af6abdc17deb95c9b1f1d9242ba49b8b5e0b016)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/atrac3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c
index 81d25ec204..22dfdfc426 100644
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -454,6 +454,8 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
 
             for (k=0; k<coded_components; k++) {
                 sfIndx = get_bits(gb,6);
+                if(component_count>=64)
+                    return AVERROR_INVALIDDATA;
                 pComponent[component_count].pos = j * 64 + (get_bits(gb,6));
                 max_coded_values = 1024 - pComponent[component_count].pos;
                 coded_values = coded_values_per_component + 1;

From 8e101086ebbf73f6ef0756ace3e1b42ecbb1eff2 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 12 Jan 2012 22:19:09 +0100
Subject: [PATCH 212/315] update for 0.5.8

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 RELEASE | 2 +-
 VERSION | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/RELEASE b/RELEASE
index dd684b485d..a49bbfc592 100644
--- a/RELEASE
+++ b/RELEASE
@@ -183,7 +183,7 @@ their patches against this branch.
 
 
-* 0.5.7 Jan 11, 2012
+* 0.5.8 Jan 12, 2012
 
 General notes
 -------------
diff --git a/VERSION b/VERSION
index d3532a107e..659914ae94 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.7
+0.5.8

From 2e693be7e9b1a70360dd6849a58fae74b16afba1 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 31 Mar 2012 07:52:42 +0200
Subject: [PATCH 213/315] id3v2: fix skipping extended header in id3v2.4

In v2.4, the length includes the length field itself.
(cherry picked from commit ddb4431208745ea270dce8fce4cba999f0ed4303)

Conflicts:

	libavformat/id3v2.c

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/mp3.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/libavformat/mp3.c b/libavformat/mp3.c
index fa383d6512..ca414082ea 100644
--- a/libavformat/mp3.c
+++ b/libavformat/mp3.c
@@ -246,8 +246,17 @@ static void id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t fl
         goto error;
     }
 
-    if(isv34 && flags & 0x40) /* Extended header present, just skip over it */
-        url_fskip(s->pb, id3v2_get_size(s->pb, 4));
+    if (isv34 && flags & 0x40) { /* Extended header present, just skip over it */
+        int extlen = id3v2_get_size(s->pb, 4);
+        if (version == 4)
+            extlen -= 4;     // in v2.4 the length includes the length field we just read
+
+        if (extlen < 0) {
+            reason = "invalid extended header length";
+            goto error;
+        }
+        url_fskip(s->pb, extlen);
+    }
 
     while(len >= taghdrlen) {
         if(isv34) {

From bde4b660637c9f08eef51697c54417037a9eeb2f Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 Jan 2012 22:20:26 +0100
Subject: [PATCH 214/315] nsvdec: Fix use of uninitialized streams.

Fixes CVE-2011-3940 (Out of bounds read resulting in out of bounds write)

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 5c011706bc752d34bc6ada31d7df2ca0c9af7c6b)

Signed-off-by: Alex Converse <alex.converse@gmail.com>
(cherry picked from commit 6a89b41d9780325ba6d89a37f2aeb925aa68e6a3)
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 65beb8c1173906b0541442713cb29e8ba44c47ef)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 1edf848a81464afd514afbbbcb97b471d334e14a)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/nsvdec.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index 719337c898..d592617a87 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -586,12 +586,12 @@ null_chunk_retry:
     }
 
     /* map back streams to v,a */
-    if (s->streams[0])
+    if (s->nb_streams > 0)
         st[s->streams[0]->id] = s->streams[0];
-    if (s->streams[1])
+    if (s->nb_streams > 1)
         st[s->streams[1]->id] = s->streams[1];
 
-    if (vsize/* && st[NSV_ST_VIDEO]*/) {
+    if (vsize && st[NSV_ST_VIDEO]) {
         nst = st[NSV_ST_VIDEO]->priv_data;
         pkt = &nsv->ahead[NSV_ST_VIDEO];
         av_get_packet(pb, pkt, vsize);
@@ -606,7 +606,7 @@ null_chunk_retry:
     if(st[NSV_ST_VIDEO])
         ((NSVStream*)st[NSV_ST_VIDEO]->priv_data)->frame_offset++;
 
-    if (asize/*st[NSV_ST_AUDIO]*/) {
+    if (asize && st[NSV_ST_AUDIO]) {
         nst = st[NSV_ST_AUDIO]->priv_data;
         pkt = &nsv->ahead[NSV_ST_AUDIO];
         /* read raw audio specific header on the first audio chunk... */

From 056c909d9df7704c8e5bbaab9fdab5e7bc969e0b Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 26 Jan 2012 17:21:46 -0800
Subject: [PATCH 215/315] nsvdec: Be more careful with av_malloc().

Check results for av_malloc() and fix an overflow in one call.

Related to CVE-2011-3940.

Based in part on work from Michael Niedermayer.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
(cherry picked from commit 8fd8a48263ff1437f9d02d7e78dc63efb9b5ed3a)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit be524c186b50337db64d34a5726dfe3e8ea94f09)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 87007519c81c37d8a3de424de3db14078ae84333)

Conflicts:

	libavformat/nsvdec.c
---
 libavformat/nsvdec.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index d592617a87..9e5f38d426 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -317,7 +317,9 @@ static int nsv_parse_NSVf_header(AVFormatContext *s, AVFormatParameters *ap)
         char *token, *value;
         char quote;
 
-        p = strings = av_mallocz(strings_size + 1);
+        p = strings = av_mallocz((size_t)strings_size + 1);
+        if (!p)
+            return AVERROR(ENOMEM);
         endp = strings + strings_size;
         get_buffer(pb, strings, strings_size);
         while (p < endp) {
@@ -351,6 +353,8 @@ static int nsv_parse_NSVf_header(AVFormatContext *s, AVFormatParameters *ap)
         if((unsigned)table_entries >= UINT_MAX / sizeof(uint32_t))
             return -1;
         nsv->nsvf_index_data = av_malloc(table_entries * sizeof(uint32_t));
+        if (!nsv->nsvf_index_data)
+            return AVERROR(ENOMEM);
 #warning "FIXME: Byteswap buffer as needed"
         get_buffer(pb, (unsigned char *)nsv->nsvf_index_data, table_entries * sizeof(uint32_t));
     }

From ec4979e16e119e9ccb42b111008e74b9bc667428 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 26 Jan 2012 17:23:09 -0800
Subject: [PATCH 216/315] nsvdec: Propagate errors

Related to CVE-2011-3940.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
(cherry picked from commit c898431ca5ef2a997fe9388b650f658fb60783e5)

Conflicts:

	libavformat/nsvdec.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 0100c4b1b0736e0f5b3c98f9b0ab8acbef574888)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 3253dd2b420583a7f10afa87e47b9cb73e950e2a)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/nsvdec.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index 9e5f38d426..25b3d6c4ae 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -511,11 +511,16 @@ static int nsv_read_header(AVFormatContext *s, AVFormatParameters *ap)
     for (i = 0; i < NSV_MAX_RESYNC_TRIES; i++) {
         if (nsv_resync(s) < 0)
             return -1;
-        if (nsv->state == NSV_FOUND_NSVF)
+        if (nsv->state == NSV_FOUND_NSVF) {
             err = nsv_parse_NSVf_header(s, ap);
+            if (err < 0)
+                return err;
+        }
             /* we need the first NSVs also... */
         if (nsv->state == NSV_FOUND_NSVS) {
             err = nsv_parse_NSVs_header(s, ap);
+            if (err < 0)
+                return err;
             break; /* we just want the first one */
         }
     }

From 479869c499a28a0c19dfc355d6ffcd1d8c02720e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 Jan 2012 17:48:23 +0100
Subject: [PATCH 217/315] dv: check stype

dv: check stype

Fixes part1 of CVE-2011-3929
Possibly fixes part of CVE-2011-3936

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Reviewed-by: Roman Shaposhnik <roman@shaposhnik.org>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Alex Converse <alex.converse@gmail.com>
(cherry picked from commit 635bcfccd439480003b74a665b5aa7c872c1ad6b)
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit bb737d381f6d6413899a0697f426fb082eac66fc)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 38421f27b3899a930552750fe1e0dffd45b71b8e)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/dv.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavformat/dv.c b/libavformat/dv.c
index 820c3b5cec..c650b5b52e 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -202,6 +202,12 @@ static int dv_extract_audio_info(DVDemuxContext* c, uint8_t* frame)
     stype = (as_pack[3] & 0x1f);      /* 0 - 2CH, 2 - 4CH, 3 - 8CH */
     quant =  as_pack[4] & 0x07;       /* 0 - 16bit linear, 1 - 12bit nonlinear */
 
+    if (stype > 3) {
+        av_log(c->fctx, AV_LOG_ERROR, "stype %d is invalid\n", stype);
+        c->ach = 0;
+        return 0;
+    }
+
     /* note: ach counts PAIRS of channels (i.e. stereo channels) */
     ach = ((int[4]){  1,  0,  2,  4})[stype];
     if (ach == 1 && quant && freq == 2)

From c4e8c99507b0b4ad384bea061afb6025ca868174 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 24 Jan 2012 17:51:40 +0100
Subject: [PATCH 218/315] dv: Fix null pointer dereference due to ach=0

dv: Fix null pointer dereference due to ach=0

Fixes part2 of CVE-2011-3929

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Reviewed-by: Roman Shaposhnik <roman@shaposhnik.org>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Alex Converse <alex.converse@gmail.com>
(cherry picked from commit 5a396bb3a66a61a68b80f2369d0249729bf85e04)
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 44e182d41e3a73548f3f5e8445ec428d3846e6d6)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit b46141b0d1d7efb74dad172b7c1b52413441592f)

Conflicts:

	libavformat/dv.c
---
 libavformat/dv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavformat/dv.c b/libavformat/dv.c
index c650b5b52e..54ed81854f 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -341,7 +341,8 @@ int dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
        c->audio_pkt[i].pts  = c->abytes * 30000*8 / c->ast[i]->codec->bit_rate;
        ppcm[i] = c->audio_buf[i];
     }
-    dv_extract_audio(buf, ppcm, c->sys);
+    if (c->ach)
+        dv_extract_audio(buf, ppcm, c->sys);
     c->abytes += size;
 
     /* We work with 720p frames split in half, thus even frames have

From 5a92aa378d066369c6d9a82192c274ae4b8997f0 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 26 Jan 2012 15:08:26 -0800
Subject: [PATCH 219/315] dv: Fix small stack overread related to CVE-2011-3929
 and CVE-2011-3936.

Found with asan.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Alex Converse <alex.converse@gmail.com>
(cherry picked from commit 2d1c0dea5f6b91bec7f5fa53ec050913d851e366)
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 00fa6ffe1a0b252d6a81815e51f125225cd0b97a)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit a8f4db0acd9b588ba33e3b8c0c21feea5916cfd1)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/dv.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/libavformat/dv.c b/libavformat/dv.c
index 54ed81854f..256dcd4312 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -125,10 +125,14 @@ static int dv_extract_audio(uint8_t* frame, uint8_t* ppcm[4],
     /* We work with 720p frames split in half, thus even frames have
      * channels 0,1 and odd 2,3. */
     ipcm = (sys->height == 720 && !(frame[1] & 0x0C)) ? 2 : 0;
-    pcm  = ppcm[ipcm++];
 
     /* for each DIF channel */
     for (chan = 0; chan < sys->n_difchan; chan++) {
+        /* next stereo channel (50Mbps and 100Mbps only) */
+        pcm = ppcm[ipcm++];
+        if (!pcm)
+            break;
+
         /* for each DIF segment */
         for (i = 0; i < sys->difseg_size; i++) {
             frame += 6 * 80; /* skip DIF segment header */
@@ -176,11 +180,6 @@ static int dv_extract_audio(uint8_t* frame, uint8_t* ppcm[4],
                 frame += 16 * 80; /* 15 Video DIFs + 1 Audio DIF */
             }
         }
-
-        /* next stereo channel (50Mbps and 100Mbps only) */
-        pcm = ppcm[ipcm++];
-        if (!pcm)
-            break;
     }
 
     return size;

From 8bb3ba55417f6bd95a6085270d9b61e39ee94645 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 17 Dec 2011 03:18:58 +0100
Subject: [PATCH 220/315] atrac3: Fix crash in tonal component decoding.

Add a check to avoid writing past the end of the channel_unit.components[]
array.

Bug Found by: cosminamironesei
Fixes CVE-2012-0853
CC: libav-stable@libav.org

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
(cherry picked from commit c509f4f74713b035a06f79cb4d00e708f5226bc5)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit f43b6e2b1ed47a1254a5d44c700a7fad5e9784be)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit f728ad26f0ec87650d2986a892785c0e2b97d161)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 224025d852dcc42f752c0922fef7121808d1e42f)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/atrac3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c
index 81d25ec204..9205f35314 100644
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -454,6 +454,8 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
 
             for (k=0; k<coded_components; k++) {
                 sfIndx = get_bits(gb,6);
+                if (component_count >= 64)
+                    return AVERROR_INVALIDDATA;
                 pComponent[component_count].pos = j * 64 + (get_bits(gb,6));
                 max_coded_values = 1024 - pComponent[component_count].pos;
                 coded_values = coded_values_per_component + 1;

From b2ac7e585e53ef6c99eef09f1b6fce373fb05125 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Wed, 25 Jan 2012 13:39:24 -0800
Subject: [PATCH 221/315] mjpegbdec: Fix overflow in SOS.

Based in part by a fix from Michael Niedermayer <michaelni@gmx.at>

Fixes CVE-2011-3947

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
(cherry picked from commit b57d262412204e54a7ef8fa1b23ff4dcede622e5)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 083a8a00373b12dc06b8ae4c49eec61fb5e55f4b)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 6ae95a0b93e8df15fe5f364535a7214be0817736)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 6ca010f20965ef71d97a53e871edae2eb9c05a5f)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/mjpegbdec.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mjpegbdec.c b/libavcodec/mjpegbdec.c
index 62b29e0623..f19a87ff96 100644
--- a/libavcodec/mjpegbdec.c
+++ b/libavcodec/mjpegbdec.c
@@ -49,6 +49,9 @@ read_header:
     s->restart_count = 0;
     s->mjpb_skiptosod = 0;
 
+    if (buf_end - buf_ptr >= 1 << 28)
+        return AVERROR_INVALIDDATA;
+
     init_get_bits(&hgb, buf_ptr, /*buf_size*/(buf_end - buf_ptr)*8);
 
     skip_bits(&hgb, 32); /* reserved zeros */
@@ -99,8 +102,8 @@ read_header:
     av_log(avctx, AV_LOG_DEBUG, "sod offs: 0x%x\n", sod_offs);
     if (sos_offs)
     {
-//        init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
-        init_get_bits(&s->gb, buf_ptr+sos_offs, field_size*8);
+        init_get_bits(&s->gb, buf_ptr + sos_offs,
+                      8 * FFMIN(field_size, buf_end - buf_ptr - sos_offs));
         s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
         s->start_code = SOS;
         ff_mjpeg_decode_sos(s);

From 6c9b404dba6f29e44ff9ce385c84759e73082cd2 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Wed, 2 May 2012 12:08:03 -0700
Subject: [PATCH 222/315] motionpixels: Clip YUV values after applying a
 gradient.

Prevents illegal reads on truncated and malformed input.

CC: libav-stable@libav.org
(cherry picked from commit b5da848facd41169283d7bfe568b83bdfa7fc42e)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit aaa6a666774eb02c351c84e80622a5c69e9b642e)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 50073e2395522b6e2b8698ff0dd06ffaf8cbf8ce)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 2134e7f6e88959513ba1713ad6fd7a7c8d5a0f41)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/motionpixels.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavcodec/motionpixels.c b/libavcodec/motionpixels.c
index f69dcf95fe..349a02d0b1 100644
--- a/libavcodec/motionpixels.c
+++ b/libavcodec/motionpixels.c
@@ -239,10 +239,13 @@ static void mp_decode_line(MotionPixelsContext *mp, GetBitContext *gb, int y)
             p = mp_get_yuv_from_rgb(mp, x - 1, y);
         } else {
             p.y += mp_gradient(mp, 0, mp_get_vlc(mp, gb));
+            p.y = av_clip(p.y, 0, 31);
             if ((x & 3) == 0) {
                 if ((y & 3) == 0) {
                     p.v += mp_gradient(mp, 1, mp_get_vlc(mp, gb));
+                    p.v = av_clip(p.v, -32, 31);
                     p.u += mp_gradient(mp, 2, mp_get_vlc(mp, gb));
+                    p.u = av_clip(p.u, -32, 31);
                     mp->hpt[((y / 4) * mp->avctx->width + x) / 4] = p;
                 } else {
                     p.v = mp->hpt[((y / 4) * mp->avctx->width + x) / 4].v;
@@ -266,9 +269,12 @@ static void mp_decode_frame_helper(MotionPixelsContext *mp, GetBitContext *gb)
             p = mp_get_yuv_from_rgb(mp, 0, y);
         } else {
             p.y += mp_gradient(mp, 0, mp_get_vlc(mp, gb));
+            p.y = av_clip(p.y, 0, 31);
             if ((y & 3) == 0) {
                 p.v += mp_gradient(mp, 1, mp_get_vlc(mp, gb));
+                p.v = av_clip(p.v, -32, 31);
                 p.u += mp_gradient(mp, 2, mp_get_vlc(mp, gb));
+                p.u = av_clip(p.u, -32, 31);
             }
             mp->vpt[y] = p;
             mp_set_rgb_from_yuv(mp, 0, y, &p);

From 468cc41d6d7bba97f67e9a6c8386ca98cde41190 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 23 Apr 2012 13:16:33 +0100
Subject: [PATCH 223/315] vqavideo: return error if image size is not a
 multiple of block size

The decoder assumes in various places that the image size
is a multiple of the block size, and there is no obvious
way to support odd sizes.  Bailing out early if the header
specifies a bad size avoids various errors later on.

Fixes CVE-2012-0947.

Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit 58b2e0f0f2fc96c1158e04f8aba95cbe6157a1a3)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit d5207e2af81580dd5e6277b354c8b459c3624f26)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit c71c77e56fcc6d469d45e1c8ce04aa053124d3f8)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit c90da45d5a7a4045dbf22fba52c63ef55d207269)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vqavideo.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c
index 00df736d3b..f34a63173b 100644
--- a/libavcodec/vqavideo.c
+++ b/libavcodec/vqavideo.c
@@ -163,6 +163,12 @@ static av_cold int vqa_decode_init(AVCodecContext *avctx)
         return -1;
     }
 
+    if (s->width  & (s->vector_width  - 1) ||
+        s->height & (s->vector_height - 1)) {
+        av_log(avctx, AV_LOG_ERROR, "Image size not multiple of block size\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* allocate codebooks */
     s->codebook_size = MAX_CODEBOOK_SIZE;
     s->codebook = av_malloc(s->codebook_size);

From 79fbcd9f0342823414793bdf09adafd7dd0872eb Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 10 May 2012 20:15:51 +0200
Subject: [PATCH 224/315] Release notes and changelog for 0.5.7

---
 Changelog | 14 ++++++++++++++
 RELEASE   | 16 ++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/Changelog b/Changelog
index 89903eb1ad..c5c5d31d49 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,20 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 
+version 0.5.8:
+
+- id3v2: fix skipping extended header in id3v2.4
+- nsvdec: Several bugfixes related to CVE-2011-3940
+- dv: check stype
+- dv: Fix null pointer dereference due to ach=0
+- dv: Fix small stack overread related to CVE-2011-3929 and CVE-2011-3936.
+- atrac3: Fix crash in tonal component decoding, fixes CVE-2012-0853
+- mjpegbdec: Fix overflow in SOS, fixes CVE-2011-3947
+- motionpixels: Clip YUV values after applying a gradient.
+- vqavideo: return error if image size is not a multiple of block size,
+  fixes CVE-2012-0947.
+
+
 version 0.5.7:
 - vorbis: An additional defense in the Vorbis codec. (CVE-2011-3895)
 - vorbisdec: Fix decoding bug with channel handling.
diff --git a/RELEASE b/RELEASE
index e68779b5b2..7c0086c16e 100644
--- a/RELEASE
+++ b/RELEASE
@@ -187,3 +187,19 @@ demuxer (CVE-2011-3893 and CVE-2011-3895).
 Distributors and system integrators are encouraged
 to update and share their patches against this branch.  For a full list
 of changes please see the Changelog file.
+
+* 0.5.8 May 10, 2012
+
+General notes
+-------------
+
+This maintenance-only release that addresses a number a number of
+security issues that have been brought to our attention. Among other
+(rather minor) fixes, this release features fixes for the DV decoder
+(CVE-2011-3929 and CVE-2011-3936), nsvdec (CVE-2011-3940), Atrac3
+(CVE-2012-0853), mjpegdec (CVE-2011-3947) and the VQA video decoder
+(CVE-2012-0947).
+
+Distributors and system integrators are encouraged
+to update and share their patches against this branch.  For a full list
+of changes please see the Changelog file.

From 4b2e02a4c4a618ac6d03fd38eb7ab0bc09596667 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 10 May 2012 20:21:51 +0200
Subject: [PATCH 225/315] Bump version number for 0.5.8 release.

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index d3532a107e..659914ae94 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.7
+0.5.8

From f73106256d138fa0d58489cd32cc7bce4566c4e3 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 11 May 2012 22:37:20 +0200
Subject: [PATCH 226/315] fix release number

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 RELEASE | 2 +-
 VERSION | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/RELEASE b/RELEASE
index 1d7aa2b768..eef90c8f8d 100644
--- a/RELEASE
+++ b/RELEASE
@@ -198,7 +198,7 @@ Distributors and system integrators are encouraged
 to update and share their patches against this branch.  For a full list
 of changes please see the Changelog file.
 
-* 0.5.8 May 10, 2012
+* 0.5.9 May 11, 2012
 
 General notes
 -------------
diff --git a/VERSION b/VERSION
index 659914ae94..416bfb0a22 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.8
+0.5.9

From 5e3cd42b6d0760d35fb0dec62ad13f09dee94448 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 16 Apr 2012 14:30:33 +0200
Subject: [PATCH 227/315] 4xmdemux: Check chunk size

Fixes over reading the header array

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 474e31c904f766b6989fe614c3fb093e697c847f)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/4xm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavformat/4xm.c b/libavformat/4xm.c
index 631f8216a5..d0fd312cc8 100644
--- a/libavformat/4xm.c
+++ b/libavformat/4xm.c
@@ -127,6 +127,10 @@ static int fourxm_read_header(AVFormatContext *s,
     for (i = 0; i < header_size - 8; i++) {
         fourcc_tag = AV_RL32(&header[i]);
         size = AV_RL32(&header[i + 4]);
+        if (size > header_size - i - 8 && (fourcc_tag == vtrk_TAG || fourcc_tag == strk_TAG)) {
+            av_log(s, AV_LOG_ERROR, "chunk larger than array %d>%d\n", size, header_size - i - 8);
+            return AVERROR_INVALIDDATA;
+        }
 
         if (fourcc_tag == std__TAG) {
             fourxm->fps = av_int2flt(AV_RL32(&header[i + 12]));

From 96c6b3a11c47555ad269404b62078124b7bb6ebf Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 2 Mar 2012 15:58:14 +0100
Subject: [PATCH 228/315] h261: check mtype.

Fixes out of array read

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit ec3cd74f2dab8e3e8234ccb994132b23d3098585)
---
 libavcodec/h261dec.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c
index 70542869c5..aa1be6f642 100644
--- a/libavcodec/h261dec.c
+++ b/libavcodec/h261dec.c
@@ -285,6 +285,10 @@ static int h261_decode_mb(H261Context *h){
 
     // Read mtype
     h->mtype = get_vlc2(&s->gb, h261_mtype_vlc.table, H261_MTYPE_VLC_BITS, 2);
+    if (h->mtype < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "illegal mtype %d\n", h->mtype);
+        return SLICE_ERROR;
+    }
     h->mtype = h261_mtype_map[h->mtype];
 
     // Read mquant

From 123e925956772a990c69dfbc25437ea8daf3b804 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 23 Mar 2012 00:49:00 +0100
Subject: [PATCH 229/315] mmdemux: dont set pkt->size to an invalid value.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 0c97fd336e17535239ab44d755a0d957dc2688f3)
---
 libavformat/mm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavformat/mm.c b/libavformat/mm.c
index 6c621ab493..1868e900bc 100644
--- a/libavformat/mm.c
+++ b/libavformat/mm.c
@@ -162,7 +162,6 @@ static int mm_read_packet(AVFormatContext *s,
         case MM_TYPE_AUDIO :
             if (av_get_packet(s->pb, pkt, length)<0)
                 return AVERROR(ENOMEM);
-            pkt->size = length;
             pkt->stream_index = 1;
             pkt->pts = mm->audio_pts++;
             return 0;

From a55db1fc497dfa30e9f0596f8bb203f7645d17b7 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 17 Mar 2012 20:45:45 +0100
Subject: [PATCH 230/315] dsp: fix diff_bytes_mmx() with small width

Fixes Ticket1068

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 73089eccd3e48539555349b36d8aabbf1cea416e)
---
 libavcodec/x86/dsputilenc_mmx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 1717a01796..c1a648b308 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -882,6 +882,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i
 
 static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
     x86_reg i=0;
+    if(w>=16)
     __asm__ volatile(
         "1:                             \n\t"
         "movq  (%2, %0), %%mm0          \n\t"

From 7944a87ba8a6b1faf167d5b116dfa55233e0a697 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Fri, 17 Feb 2012 14:13:40 -0800
Subject: [PATCH 231/315] dpcm: ignore extra unpaired bytes in stereo streams.

Fixes: CVE-2011-3951

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
(cherry picked from commit ce7aee9b733134649a6ce2fa743e51733f33e67e)
(cherry picked from commit eaeaeb265fe46e1d81452960de918227541873b4)

Conflicts:

	libavcodec/dpcm.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 1ce9c93198fc997e8f23934a78e2937af670e4e9)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 41f1f146c9e29dde63e293078819474c9b8111a1)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/dpcm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/dpcm.c b/libavcodec/dpcm.c
index daa21cd09e..a364864ba3 100644
--- a/libavcodec/dpcm.c
+++ b/libavcodec/dpcm.c
@@ -167,6 +167,7 @@ static int dpcm_decode_frame(AVCodecContext *avctx,
     int in, out = 0;
     int predictor[2];
     int channel_number = 0;
+    int stereo = s->channels - 1;
     short *output_samples = data;
     int shift[2];
     unsigned char byte;
@@ -175,6 +176,9 @@ static int dpcm_decode_frame(AVCodecContext *avctx,
     if (!buf_size)
         return 0;
 
+    if (stereo && (buf_size & 1))
+        buf_size--;
+
     // almost every DPCM variant expands one byte of data into two
     if(*data_size/2 < buf_size)
         return -1;

From 0bf8e22cdb0bbe7bc9cba9ba1046b5eae509098b Mon Sep 17 00:00:00 2001
From: Alexander Strange <astrange@ithinksw.com>
Date: Sat, 24 Mar 2012 17:32:14 -0400
Subject: [PATCH 232/315] h264: Add check for invalid chroma_format_idc

Fixes a crash when FF_DEBUG_PICT_INFO is used.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
(cherry picked from commit 6ef4063957aa5025c8d2cd757b6a537e4b6874df)

Fixes: CVE-2012-0851

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 47132345184dc3d0ff962a57a1225564fe979548)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit c5f7c755cfccd7aa01010a2d566104c2b0fa6d86)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 00d2c432581cf61326973a1a48f2e63690b65515)
---
 libavcodec/h264.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 3ea61330ec..c8b561d155 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -7131,8 +7131,12 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
 
     if(sps->profile_idc >= 100){ //high profile
         sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
-        if(sps->chroma_format_idc == 3)
+        if(sps->chroma_format_idc > 3) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "chroma_format_idc (%u) out of range\n", sps->chroma_format_idc);
+            return -1;
+        } else if(sps->chroma_format_idc == 3) {
             sps->residual_color_transform_flag = get_bits1(&s->gb);
+        }
         sps->bit_depth_luma   = get_ue_golomb(&s->gb) + 8;
         sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
         sps->transform_bypass = get_bits1(&s->gb);

From 7867cbaf6c2583642c8f3a909a9c6f00ec01b162 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Thu, 5 Jan 2012 20:50:55 +0100
Subject: [PATCH 233/315] adpcm: ADPCM Electronic Arts has always two channels

Fixes half of http://ffmpeg.org/trac/ffmpeg/ticket/794
Adresses CVE-2012-0852

(cherry picked from commit bb5b3940b08d8dad5b7e948e8f3b02cd2eb70716)

Conflicts:

	libavcodec/adpcm.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit b581580bd1cc8506befa65b0a5c9ae429240f21f)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit a0f58c3a605b8123039628d1598cb36f1da0e815)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/adpcm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c
index 994c0c6865..8eff05ecb0 100644
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c
@@ -667,17 +667,23 @@ static int adpcm_encode_frame(AVCodecContext *avctx,
 static av_cold int adpcm_decode_init(AVCodecContext * avctx)
 {
     ADPCMContext *c = avctx->priv_data;
+    unsigned int min_channels = 1;
     unsigned int max_channels = 2;
 
     switch(avctx->codec->id) {
+    case CODEC_ID_ADPCM_EA:
+        min_channels = 2;
+        break;
     case CODEC_ID_ADPCM_EA_R1:
     case CODEC_ID_ADPCM_EA_R2:
     case CODEC_ID_ADPCM_EA_R3:
         max_channels = 6;
         break;
     }
-    if(avctx->channels > max_channels){
-        return -1;
+
+    if (avctx->channels < min_channels || avctx->channels > max_channels) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n");
+        return AVERROR(EINVAL);
     }
 
     switch(avctx->codec->id) {

From 5629c3910188182a23ca0d46abd5a2350f502c58 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Thu, 26 Jan 2012 17:30:49 +0100
Subject: [PATCH 234/315] kmvc: Check palsize.

Fixes: CVE-2011-3952

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Based on fix by Michael Niedermayer
(cherry picked from commit 386741f887714d3e46c9e8fe577e326a7964037b)
(cherry picked from commit 416849f2e06227b1b4a451c392f100db1d709a0c)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit e7392dc349291eb94379d8cfb7ef73d32a768858)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/kmvc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavcodec/kmvc.c b/libavcodec/kmvc.c
index 30939ab411..69b5937668 100644
--- a/libavcodec/kmvc.c
+++ b/libavcodec/kmvc.c
@@ -33,6 +33,7 @@
 #define KMVC_KEYFRAME 0x80
 #define KMVC_PALETTE  0x40
 #define KMVC_METHOD   0x0F
+#define MAX_PALSIZE   256
 
 /*
  * Decoder context
@@ -43,7 +44,7 @@ typedef struct KmvcContext {
 
     int setpal;
     int palsize;
-    uint32_t pal[256];
+    uint32_t pal[MAX_PALSIZE];
     uint8_t *cur, *prev;
     uint8_t *frm0, *frm1;
 } KmvcContext;
@@ -366,6 +367,10 @@ static av_cold int decode_init(AVCodecContext * avctx)
         c->palsize = 127;
     } else {
         c->palsize = AV_RL16(avctx->extradata + 10);
+        if (c->palsize >= MAX_PALSIZE) {
+            av_log(avctx, AV_LOG_ERROR, "KMVC palette too large\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
     if (avctx->extradata_size == 1036) {        // palette in extradata

From ae6c57859cc0a5aaed7fcb14da63c58086c46e64 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 2 May 2012 16:12:46 +0000
Subject: [PATCH 235/315] qdm2: clip array indices returned by qdm2_get_vlc().

Prevents subsequent overreads when these numbers are used as indices
in arrays.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
CC: libav-stable@libav.org

Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
(cherry picked from commit 64953f67f98da2e787aeb45cc7f504390fa32a69)
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>

Conflicts:

	libavcodec/qdm2.c
---
 libavcodec/qdm2.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 0a4840242f..79aa256ccd 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -905,9 +905,13 @@ static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int l
                         break;
 
                     case 30:
-                        if (BITS_LEFT(length,gb) >= 4)
-                            samples[0] = type30_dequant[qdm2_get_vlc(gb, &vlc_tab_type30, 0, 1)];
-                        else
+                        if (BITS_LEFT(length,gb) >= 4) {
+                            unsigned index = qdm2_get_vlc(gb, &vlc_tab_type30, 0, 1);
+                            if (index < FF_ARRAY_ELEMS(type30_dequant)) {
+                                samples[0] = type30_dequant[index];
+                            } else
+                                samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        } else
                             samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
 
                         run = 1;
@@ -921,8 +925,12 @@ static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int l
                                 type34_predictor = samples[0];
                                 type34_first = 0;
                             } else {
-                                samples[0] = type34_delta[qdm2_get_vlc(gb, &vlc_tab_type34, 0, 1)] / type34_div + type34_predictor;
-                                type34_predictor = samples[0];
+                                unsigned index = qdm2_get_vlc(gb, &vlc_tab_type34, 0, 1);
+                                if (index < FF_ARRAY_ELEMS(type34_delta)) {
+                                    samples[0] = type34_delta[index] / type34_div + type34_predictor;
+                                    type34_predictor = samples[0];
+                                } else
+                                    samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
                             }
                         } else {
                             samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);

From b990187f992be89e7e507f0c512c9d2f3efa9068 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Sat, 10 Jul 2010 04:08:02 +0000
Subject: [PATCH 236/315] configure: properly check for mingw-w64 through
 installed headers. mingw-w64 can also target 32-bit code.

Originally committed as revision 24156 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 0a4307d6307516d333ce2cde2a2ffa0f50bc176c)
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 745d8fdd67..c2da74aa1b 100755
--- a/configure
+++ b/configure
@@ -1611,7 +1611,7 @@ case $target_os in
         objformat="win32"
         enable dos_paths
         check_cflags -fno-common
-        if ! enabled x86_64; then
+        if ! check_cpp_condition _mingw.h "defined (__MINGW64_VERSION_MAJOR)"; then
             check_cpp_condition _mingw.h "(__MINGW32_MAJOR_VERSION > 3) || (__MINGW32_MAJOR_VERSION == 3 && __MINGW32_MINOR_VERSION >= 15)" ||
                 die "ERROR: MinGW runtime version must be >= 3.15."
             enabled_any avisynth vfwcap_demuxer &&

From f6147effd2e998007b30510f723ab75cd549b226 Mon Sep 17 00:00:00 2001
From: kemuri <kemuri9@gmail.com>
Date: Sat, 23 Jan 2010 20:42:00 +0000
Subject: [PATCH 237/315] Replace every usage of -lvfw32 with what is
 particularly necessary for that case: Avisynth -> -lavifil32 VFW Cap ->
 -lavicap32 Patch by kemuri <kemuri9 at gmail dot com>

Originally committed as revision 21410 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit a1b3c5a377976d21b9daa878265c6eada24c2543)
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>

Conflicts:

	configure
---
 configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index c2da74aa1b..61db28c21f 100755
--- a/configure
+++ b/configure
@@ -1132,7 +1132,7 @@ sdp_demuxer_deps="rtp_protocol mpegts_demuxer"
 v4l_demuxer_deps="linux_videodev_h"
 v4l2_demuxer_deps_any="linux_videodev2_h sys_videoio_h"
 vfwcap_demuxer_deps="capCreateCaptureWindow"
-vfwcap_demuxer_extralibs="-lvfw32"
+vfwcap_demuxer_extralibs="-lavicap32"
 x11_grab_device_demuxer_deps="x11grab XShmCreateImage"
 x11_grab_device_demuxer_extralibs="-lX11 -lXext"
 
@@ -2022,7 +2022,7 @@ EOF
 done
 
 # these are off by default, so fail if requested and not available
-enabled avisynth   && require2 vfw32 "windows.h vfw.h" AVIFileInit -lvfw32
+enabled avisynth   && require2 vfw32 "windows.h vfw.h" AVIFileInit -lavifil32
 enabled libamr_nb  && require  libamrnb amrnb/interf_dec.h Speech_Decode_Frame_init -lamrnb -lm
 enabled libamr_wb  && require  libamrwb amrwb/dec_if.h D_IF_init -lamrwb -lm
 enabled libdirac   && add_cflags $(pkg-config --cflags dirac) &&
@@ -2149,7 +2149,7 @@ check_header linux/videodev.h
 check_header linux/videodev2.h
 check_header sys/videoio.h
 
-check_func_headers "windows.h vfw.h" capCreateCaptureWindow -lvfw32
+check_func_headers "windows.h vfw.h" capCreateCaptureWindow -lavicap32
 
 # check for ioctl_meteor.h, ioctl_bt848.h and alternatives
 { check_header dev/bktr/ioctl_meteor.h &&

From baba561c0b0ee449837f727df5f88c038b7d2569 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Sun, 11 Jul 2010 22:17:17 +0000
Subject: [PATCH 238/315] mingw32: properly check if vfw capture is supported
 by the system headers

Remove check for an specific w32api version, checking instead if vfw.h
supports vfw capture. The defines in w32api 3.12 were wrong, so this must be
accounted for in the check.

Originally committed as revision 24203 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit ec1ee802a2e1cb3317bd44851cc28f95b5916051)
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>

Conflicts:

	configure
---
 configure | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index 61db28c21f..835535fc24 100755
--- a/configure
+++ b/configure
@@ -1131,7 +1131,7 @@ rtsp_demuxer_deps="sdp_demuxer"
 sdp_demuxer_deps="rtp_protocol mpegts_demuxer"
 v4l_demuxer_deps="linux_videodev_h"
 v4l2_demuxer_deps_any="linux_videodev2_h sys_videoio_h"
-vfwcap_demuxer_deps="capCreateCaptureWindow"
+vfwcap_demuxer_deps="capCreateCaptureWindow vfwcap_defines"
 vfwcap_demuxer_extralibs="-lavicap32"
 x11_grab_device_demuxer_deps="x11grab XShmCreateImage"
 x11_grab_device_demuxer_extralibs="-lX11 -lXext"
@@ -1614,10 +1614,7 @@ case $target_os in
         if ! check_cpp_condition _mingw.h "defined (__MINGW64_VERSION_MAJOR)"; then
             check_cpp_condition _mingw.h "(__MINGW32_MAJOR_VERSION > 3) || (__MINGW32_MAJOR_VERSION == 3 && __MINGW32_MINOR_VERSION >= 15)" ||
                 die "ERROR: MinGW runtime version must be >= 3.15."
-            enabled_any avisynth vfwcap_demuxer &&
-                { check_cpp_condition w32api.h "(__W32API_MAJOR_VERSION > 3) || (__W32API_MAJOR_VERSION == 3 && __W32API_MINOR_VERSION >= 13)" ||
-                  die "ERROR: avisynth and vfwcap_demuxer require w32api version 3.13 or later."; }
-            fi
+        fi
         ;;
     cygwin*)
         target_os=cygwin
@@ -2150,6 +2147,9 @@ check_header linux/videodev2.h
 check_header sys/videoio.h
 
 check_func_headers "windows.h vfw.h" capCreateCaptureWindow -lavicap32
+# check that WM_CAP_DRIVER_CONNECT is defined to the proper value
+# w32api 3.12 had it defined wrong
+check_cpp_condition vfw.h "WM_CAP_DRIVER_CONNECT > WM_USER" && enable vfwcap_defines
 
 # check for ioctl_meteor.h, ioctl_bt848.h and alternatives
 { check_header dev/bktr/ioctl_meteor.h &&

From b061ee9a5d68aa9f619ae8028171018940f07471 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Sun, 11 Jul 2010 22:31:41 +0000
Subject: [PATCH 239/315] mingw32: merge checks for mingw-w64 and
 mingw32-runtime >= 3.15 into one

Originally committed as revision 24204 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit e26011d0f495de1148b8014995cbe923611b6b76)
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 configure | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index 835535fc24..67cc2f65ca 100755
--- a/configure
+++ b/configure
@@ -1611,10 +1611,9 @@ case $target_os in
         objformat="win32"
         enable dos_paths
         check_cflags -fno-common
-        if ! check_cpp_condition _mingw.h "defined (__MINGW64_VERSION_MAJOR)"; then
-            check_cpp_condition _mingw.h "(__MINGW32_MAJOR_VERSION > 3) || (__MINGW32_MAJOR_VERSION == 3 && __MINGW32_MINOR_VERSION >= 15)" ||
+        check_cpp_condition _mingw.h "defined (__MINGW64_VERSION_MAJOR) || (__MINGW32_MAJOR_VERSION > 3) \
+                                      || (__MINGW32_MAJOR_VERSION == 3 && __MINGW32_MINOR_VERSION >= 15)" ||
                 die "ERROR: MinGW runtime version must be >= 3.15."
-        fi
         ;;
     cygwin*)
         target_os=cygwin

From 79674d27d9f9e5a7a4b6827fb0651e5b5f12167e Mon Sep 17 00:00:00 2001
From: kemuri <kemuri9@gmail.com>
Date: Sat, 23 Jan 2010 20:58:29 +0000
Subject: [PATCH 240/315] vfwcap: Include windows.h before vfw.h since the
 latter requires defines from the former. Patch by kemuri <kemuri9 at gmail
 dot com>

Originally committed as revision 21411 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 420755dd282a913c2163d5589706d6a99a18d10f)
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 libavdevice/vfwcap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index 4b6f73bc47..e415b740cf 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -20,8 +20,8 @@
  */
 
 #include "libavformat/avformat.h"
-#include <vfw.h>
 #include <windows.h>
+#include <vfw.h>
 
 //#define DEBUG_VFW
 

From 8ba939fcda58a2af781d8050bd7108b9e951eeaf Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 4 May 2012 16:06:26 -0700
Subject: [PATCH 241/315] ea: check chunk_size for validity.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
CC: libav-stable@libav.org
(cherry picked from commit 273e6af47b38391f2bcc157cca0423fe7fcbf55c)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 6a86b705e1d4b72f0dddfbe23ad3eed9947001d5)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit e74bc64dd376c4691a610ba62a66ed30affc97ec)

Conflicts:

	libavformat/electronicarts.c
(cherry picked from commit 38c45adfca299e3d96c07a700032695ec7ff2aeb)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/electronicarts.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavformat/electronicarts.c b/libavformat/electronicarts.c
index fe19e70f73..ad63c5619b 100644
--- a/libavformat/electronicarts.c
+++ b/libavformat/electronicarts.c
@@ -448,12 +448,17 @@ static int ea_read_packet(AVFormatContext *s,
 
     while (!packet_read) {
         chunk_type = get_le32(pb);
-        chunk_size = (ea->big_endian ? get_be32(pb) : get_le32(pb)) - 8;
+        chunk_size = ea->big_endian ? get_be32(pb) : get_le32(pb);
+        if (chunk_size <= 8)
+            return AVERROR_INVALIDDATA;
+        chunk_size -= 8;
 
         switch (chunk_type) {
         /* audio data */
         case ISNh_TAG:
             /* header chunk also contains data; skip over the header portion*/
+            if (chunk_size < 32)
+                return AVERROR_INVALIDDATA;
             url_fskip(pb, 32);
             chunk_size -= 32;
         case ISNd_TAG:

From f8a31e2113abc9dc830e93afa8a3d279fe464e7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sun, 24 May 2009 09:14:19 +0000
Subject: [PATCH 242/315] eatqi: move "block" variable into context to ensure
 sufficient alignment for idct_put for compilers/architectures that can not
 align stack variables that much. This is also consistent with similar code in
 eatgq.c

Originally committed as revision 18927 to svn://svn.ffmpeg.org/ffmpeg/trunk
(cherry picked from commit 1eda87ce6366189eebf9956f826dfd92d9e64d9c)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/eatqi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index 66123a2aae..4e43f95e50 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -40,6 +40,7 @@ typedef struct TqiContext {
     AVFrame frame;
     uint8_t *bitstream_buf;
     unsigned int bitstream_buf_size;
+    DECLARE_ALIGNED_16(DCTELEM, block[6][64]);
 } TqiContext;
 
 static av_cold int tqi_decode_init(AVCodecContext *avctx)
@@ -106,7 +107,6 @@ static int tqi_decode_frame(AVCodecContext *avctx,
     const uint8_t *buf_end = buf+buf_size;
     TqiContext *t = avctx->priv_data;
     MpegEncContext *s = &t->s;
-    DECLARE_ALIGNED_16(DCTELEM, block[6][64]);
 
     s->width  = AV_RL16(&buf[0]);
     s->height = AV_RL16(&buf[2]);
@@ -134,8 +134,8 @@ static int tqi_decode_frame(AVCodecContext *avctx,
     for (s->mb_y=0; s->mb_y<(avctx->height+15)/16; s->mb_y++)
     for (s->mb_x=0; s->mb_x<(avctx->width+15)/16; s->mb_x++)
     {
-        tqi_decode_mb(s, block);
-        tqi_idct_put(t, block);
+        tqi_decode_mb(s, t->block);
+        tqi_idct_put(t, t->block);
     }
 
     *data_size = sizeof(AVFrame);

From 02cd93f4ad432769bdda96c36e920d3664bc3e75 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 19 Dec 2011 04:13:37 +0100
Subject: [PATCH 243/315] tqi: Pass errors from the MB decoder

This silences some valgrind warnings.
CC: libav-stable@libav.org

Fixes second half of http://ffmpeg.org/trac/ffmpeg/ticket/794
Bug found by: Oana Stratulat

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit f85334f58e1286287d0547a49fa9c93b40cbf48f)
(cherry picked from commit 90290a5150e84fb138ccde57657dc03830f08c1c)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 5872580e65aab026b77754eb184f97ba7cc6ea35)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 2f2fd8c6d1c51a6b817e6c0bc4eff308b8f9cd18)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit c3edce42704142f4c66954e9f24d7fbf0e5ae423)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/eatqi.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index 4e43f95e50..e3c06c0055 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -59,12 +59,15 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static void tqi_decode_mb(MpegEncContext *s, DCTELEM (*block)[64])
+static int tqi_decode_mb(MpegEncContext *s, DCTELEM (*block)[64])
 {
     int n;
     s->dsp.clear_blocks(block[0]);
     for (n=0; n<6; n++)
-        ff_mpeg1_decode_block_intra(s, block[n], n);
+        if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0)
+            return -1;
+
+    return 0;
 }
 
 static inline void tqi_idct_put(TqiContext *t, DCTELEM (*block)[64])
@@ -134,7 +137,8 @@ static int tqi_decode_frame(AVCodecContext *avctx,
     for (s->mb_y=0; s->mb_y<(avctx->height+15)/16; s->mb_y++)
     for (s->mb_x=0; s->mb_x<(avctx->width+15)/16; s->mb_x++)
     {
-        tqi_decode_mb(s, t->block);
+        if (tqi_decode_mb(s, t->block) < 0)
+            break;
         tqi_idct_put(t, t->block);
     }
 

From 5a9588b088423772ba567a71a3b0d4800c77d407 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 3 Jun 2012 19:35:50 +0200
Subject: [PATCH 244/315] png: check bit depth for PAL8/Y400A pixel formats.

Wrong bit depth can lead to invalid rowsize values, which crashes the
decoder further down.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
CC: libav-stable@libav.org
(cherry picked from commit d2205d6543881f2e6fa18c8a354bbcf91a1235f7)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit b8d6ba9d50e80fdce2ed74cdaffd4960df8a21c5)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 33f93005f1a86c108302b4c5978aa1a3d8e092cc)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 4c8c2660bd9252775c9a1dc2e2f36cb34718595a)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>

Conflicts:

	libavcodec/pngdec.c
---
 libavcodec/pngdec.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index a3431525b6..d583a523b0 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -473,7 +473,8 @@ static int decode_frame(AVCodecContext *avctx,
                 } else if (s->bit_depth == 1 &&
                            s->color_type == PNG_COLOR_TYPE_GRAY) {
                     avctx->pix_fmt = PIX_FMT_MONOBLACK;
-                } else if (s->color_type == PNG_COLOR_TYPE_PALETTE) {
+                } else if (s->bit_depth == 8 &&
+                           s->color_type == PNG_COLOR_TYPE_PALETTE) {
                     avctx->pix_fmt = PIX_FMT_PAL8;
                 } else {
                     goto fail;

From 4c223fe519174f0d7086f4698e9f7b9840cf15e9 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 3 Jun 2012 22:42:30 +0200
Subject: [PATCH 245/315] Bump version number for 0.5.9 release.

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 659914ae94..416bfb0a22 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.8
+0.5.9

From d843e7dc941efab94c6756bf65d8421d80f9ce9a Mon Sep 17 00:00:00 2001
From: Derek Buitenhuis <derek.buitenhuis@gmail.com>
Date: Fri, 8 Jun 2012 15:41:31 -0400
Subject: [PATCH 246/315] Update changelog for 0.5.9 release

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 Changelog | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Changelog b/Changelog
index c5c5d31d49..b39ecc0817 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,23 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 
+version 0.5.9:
+- dpcm: ignore extra unpaired bytes in stereo streams (CVE-2011-3951)
+- h264: Add check for invalid chroma_format_idc (CVE-2012-0851)
+- adpcm: ADPCM Electronic Arts has always two channels (CVE-2012-0852)
+- kmvc: Check palsize (CVE-2011-3952)
+- qdm2: clip array indices returned by qdm2_get_vlc()
+- configure: properly check for mingw-w64 through installed headers
+- Replace every usage of -lvfw32 with what is particularly necessary for that case
+- mingw32: properly check if vfw capture is supported by the system headers
+- mingw32: merge checks for mingw-w64 and mingw32-runtime >= 3.15 into one
+- vfwcap: Include windows.h before vfw.h since the latter requires defines from the former
+- ea: check chunk_size for validity
+- eatqi: move "block" variable into context to ensure sufficient alignment for idct_put
+- tqi: Pass errors from the MB decoder
+- png: check bit depth for PAL8/Y400A pixel formats.
+
+
 version 0.5.8:
 
 - id3v2: fix skipping extended header in id3v2.4

From 6ec1d3b3bafa59363763631a8e090b2e12f6ef33 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sat, 9 Jun 2012 12:12:52 +0200
Subject: [PATCH 247/315] Release notes for 0.5.9

---
 RELEASE | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/RELEASE b/RELEASE
index 7c0086c16e..c164d6b566 100644
--- a/RELEASE
+++ b/RELEASE
@@ -188,6 +188,8 @@ Distributors and system integrators are encouraged
 to update and share their patches against this branch.  For a full list
 of changes please see the Changelog file.
 
+
+
 * 0.5.8 May 10, 2012
 
 General notes
@@ -203,3 +205,20 @@ security issues that have been brought to our attention. Among other
 Distributors and system integrators are encouraged
 to update and share their patches against this branch.  For a full list
 of changes please see the Changelog file.
+
+
+
+* 0.5.9 Jun 09, 2012
+
+General notes
+-------------
+
+This mostly maintenance-only release addresses a number a number of bugs
+such as security and compilation issues that have been brought to our
+attention. Among other fixes, this release features includes security
+updates for the DPCM codecs (CVE-2011-3951), H.264 (CVE-2012-0851),
+ADPCM (CVE-2012-0852), and the KMVC decoder (CVE-2011-3952).
+
+Distributors and system integrators are encouraged
+to update and share their patches against this branch.  For a full list
+of changes please see the Changelog file or the git commit history.

From cc511b36f35400bc13f3987da374462f7c20c6c5 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 30 May 2012 16:19:36 +0200
Subject: [PATCH 248/315] truemotion1: Check index, fix out of array read

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit fd4c1c0b70b5a06dd572d7e27799a2f4c3d9b984)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/truemotion1.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/libavcodec/truemotion1.c b/libavcodec/truemotion1.c
index 1cf56ed3dd..ada1270bac 100644
--- a/libavcodec/truemotion1.c
+++ b/libavcodec/truemotion1.c
@@ -519,6 +519,10 @@ hres,vres,i,i%vres (0 < i < 4)
 }
 
 #define APPLY_C_PREDICTOR() \
+    if(index > 1023){\
+        av_log(s->avctx, AV_LOG_ERROR, " index %d went out of bounds\n", index); \
+        return; \
+    }\
     predictor_pair = s->c_predictor_table[index]; \
     horiz_pred += (predictor_pair >> 1); \
     if (predictor_pair & 1) { \
@@ -536,6 +540,10 @@ hres,vres,i,i%vres (0 < i < 4)
         index++;
 
 #define APPLY_C_PREDICTOR_24() \
+    if(index > 1023){\
+        av_log(s->avctx, AV_LOG_ERROR, " index %d went out of bounds\n", index); \
+        return; \
+    }\
     predictor_pair = s->c_predictor_table[index]; \
     horiz_pred += (predictor_pair >> 1); \
     if (predictor_pair & 1) { \
@@ -554,6 +562,10 @@ hres,vres,i,i%vres (0 < i < 4)
 
 
 #define APPLY_Y_PREDICTOR() \
+    if(index > 1023){\
+        av_log(s->avctx, AV_LOG_ERROR, " index %d went out of bounds\n", index); \
+        return; \
+    }\
     predictor_pair = s->y_predictor_table[index]; \
     horiz_pred += (predictor_pair >> 1); \
     if (predictor_pair & 1) { \
@@ -571,6 +583,10 @@ hres,vres,i,i%vres (0 < i < 4)
         index++;
 
 #define APPLY_Y_PREDICTOR_24() \
+    if(index > 1023){\
+        av_log(s->avctx, AV_LOG_ERROR, " index %d went out of bounds\n", index); \
+        return; \
+    }\
     predictor_pair = s->y_predictor_table[index]; \
     horiz_pred += (predictor_pair >> 1); \
     if (predictor_pair & 1) { \

From 48ef11663164b5e7221089a813e733dc7a0776b3 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 1 Jun 2012 21:42:29 +0200
Subject: [PATCH 249/315] wmv1: check that the input buffer is large enough

Fixes null ptr deref
Fixes Ticket1367

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f23a2418fb0ccc56fdae4dbf83a5994cc917c475)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/wnv1.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/wnv1.c b/libavcodec/wnv1.c
index 7c0105f1ef..aa284da22f 100644
--- a/libavcodec/wnv1.c
+++ b/libavcodec/wnv1.c
@@ -67,6 +67,11 @@ static int decode_frame(AVCodecContext *avctx,
     int prev_y = 0, prev_u = 0, prev_v = 0;
     uint8_t *rbuf;
 
+    if(buf_size<=8) {
+        av_log(avctx, AV_LOG_ERROR, "buf_size %d is too small\n", buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+
     rbuf = av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
     if(!rbuf){
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");

From 9eaec5b8f010c805fd8e77216a1ec67eb20b1466 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 9 Jun 2012 22:18:07 +0200
Subject: [PATCH 250/315] update for 0.5.10

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 VERSION    | 2 +-
 cmdutils.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/VERSION b/VERSION
index 416bfb0a22..50c76ef872 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.9
+0.5.10
diff --git a/cmdutils.c b/cmdutils.c
index 108eaccdd1..f1e5352362 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -50,7 +50,7 @@ AVCodecContext *avctx_opts[CODEC_TYPE_NB];
 AVFormatContext *avformat_opts;
 struct SwsContext *sws_opts;
 
-const int this_year = 2009;
+const int this_year = 2012;
 
 double parse_number_or_die(const char *context, const char *numstr, int type, double min, double max)
 {

From 09a278fdd13cfc16c151190051f362625595c06e Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Thu, 13 Sep 2012 09:26:55 +0200
Subject: [PATCH 251/315] Fix muxing mjpeg in swf. (cherry picked from commit
 79234907128390d775ceb1206dd0fed111a17896)

---
 libavformat/swfenc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/swfenc.c b/libavformat/swfenc.c
index b433f6b713..c1bb2b7023 100644
--- a/libavformat/swfenc.c
+++ b/libavformat/swfenc.c
@@ -496,8 +496,10 @@ static int swf_write_trailer(AVFormatContext *s)
         put_le32(pb, file_size);
         url_fseek(pb, swf->duration_pos, SEEK_SET);
         put_le16(pb, swf->video_frame_number);
+        if (swf->vframes_pos) {
         url_fseek(pb, swf->vframes_pos, SEEK_SET);
         put_le16(pb, swf->video_frame_number);
+        }
         url_fseek(pb, file_size, SEEK_SET);
     }
     return 0;

From 9125aa9218c3ff2ae89ef2094c9138438dbeec70 Mon Sep 17 00:00:00 2001
From: Kostya Shishkov <kostya.shishkov@gmail.com>
Date: Thu, 27 Sep 2012 19:25:06 +0200
Subject: [PATCH 252/315] vc1dec: add flush function for WMV9 and VC-1 decoders

CC: libav-stable@libav.org
(cherry picked from commit 4dc8c8386eef942dba35c4f2fb3210e22b511a5b)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 02b72394627933dc8ce26445231a69f00dba491b)

Conflicts:
	libavcodec/vc1dec.c

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 0173a7966b331105158a88f96b9afcc431d2fef8)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit aa4121276777b20eaaa83bf9bd544b00748c865c)

Conflicts:
	libavcodec/vc1dec.c

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/vc1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 619e9030cb..3b8cd22933 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -4347,6 +4347,7 @@ AVCodec vc1_decoder = {
     vc1_decode_frame,
     CODEC_CAP_DELAY,
     NULL,
+    .flush          = ff_mpeg_flush,
     .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1"),
     .pix_fmts = ff_pixfmt_list_420
 };
@@ -4362,6 +4363,7 @@ AVCodec wmv3_decoder = {
     vc1_decode_frame,
     CODEC_CAP_DELAY,
     NULL,
+    .flush          = ff_mpeg_flush,
     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
     .pix_fmts = ff_pixfmt_list_420
 };

From f695be22d89aab1e0549e6ef04b1431c7db96316 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Mon, 6 Aug 2012 13:59:04 +0200
Subject: [PATCH 253/315] nuv: check RTjpeg header for validity

CC: libav-stable@libav.org
(cherry picked from commit 859a579e9bbf47fae2e09494c43bcf813dcb2fad)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 6704522ca9dd32c858ee474492be568c386910f9)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit f31170d4e7f9671e019315391160d454b18d7296)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 459feb7cce03af7154c098171fc9d36fc9d472f6)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/nuv.c    | 9 +++++----
 libavcodec/rtjpeg.h | 3 +++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index 109ef41a8c..64ba3ccc10 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c
@@ -182,17 +182,18 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     }
     if (c->codec_frameheader) {
         int w, h, q;
-        if (buf_size < 12) {
+        if (buf_size < RTJPEG_HEADER_SIZE || buf[4] != RTJPEG_HEADER_SIZE ||
+            buf[5] != RTJPEG_FILE_VERSION) {
             av_log(avctx, AV_LOG_ERROR, "invalid nuv video frame\n");
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         w = AV_RL16(&buf[6]);
         h = AV_RL16(&buf[8]);
         q = buf[10];
         if (!codec_reinit(avctx, w, h, q))
             return -1;
-        buf = &buf[12];
-        buf_size -= 12;
+        buf = &buf[RTJPEG_HEADER_SIZE];
+        buf_size -= RTJPEG_HEADER_SIZE;
     }
 
     if (keyframe && c->pic.data[0])
diff --git a/libavcodec/rtjpeg.h b/libavcodec/rtjpeg.h
index 02f2058b2c..c12a78c67b 100644
--- a/libavcodec/rtjpeg.h
+++ b/libavcodec/rtjpeg.h
@@ -25,6 +25,9 @@
 #include <stdint.h>
 #include "dsputil.h"
 
+#define RTJPEG_FILE_VERSION 0
+#define RTJPEG_HEADER_SIZE 12
+
 typedef struct {
     int w, h;
     DSPContext *dsp;

From 7296a6b5e9c22be515f7bd60a3c6c293691de3f8 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Mon, 2 Jul 2012 10:46:39 +0200
Subject: [PATCH 254/315] imgconvert: avoid undefined left shift in
 avcodec_find_best_pix_fmt

CC: libav-stable@libav.org
(cherry picked from commit 39bb27bf79bc4c2d8beaed637a14176264cb1916)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 7a7229b52d1900279041991fadbd29b27e8dfe95)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 8812b5f164109553f009ce385e17a1af16b6ea53)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit fd7426ed898533bed98e6b472ff5f5c8e47f2eb5)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/imgconvert.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 1e0c66de84..eecd12535e 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -890,7 +890,8 @@ static int avcodec_find_best_pix_fmt1(int64_t pix_fmt_mask,
     /* find exact color match with smallest size */
     dst_pix_fmt = -1;
     min_dist = 0x7fffffff;
-    for(i = 0;i < PIX_FMT_NB; i++) {
+    /* test only the first 64 pixel formats to avoid undefined behaviour */
+    for (i = 0; i < 64; i++) {
         if (pix_fmt_mask & (1ULL << i)) {
             loss = avcodec_get_pix_fmt_loss(i, src_pix_fmt, has_alpha) & loss_mask;
             if (loss == 0) {

From 6d6373dc6441b26271d37ec372507e29c0123a88 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 20 Nov 2011 17:19:25 +0100
Subject: [PATCH 255/315] mpegvideo: Don't use ff_mspel_motion() for vc1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using ff_mspel_motion assumes that s (a MpegEncContext
poiinter) really is a Wmv2Context.

This fixes crashes in error resilience on vc1/wmv3 videos.

CC: libav-stable@libav.org
Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit 18f2d5cb9c48d06895960f37467576725c9dc2d1)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit da0c457663479bc1828918e1bb3e4a5e4de0d557)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 899d95efe12f1e250b361837c1c8c06df9ac9b86)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit c82ae85a8a78a98f7c7fea68d24a4ac0ca74d01f)

Conflicts:
	libavcodec/mpegvideo_common.h

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/mpegvideo_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/mpegvideo_common.h b/libavcodec/mpegvideo_common.h
index cf66dc7fbb..6c39ac7325 100644
--- a/libavcodec/mpegvideo_common.h
+++ b/libavcodec/mpegvideo_common.h
@@ -727,7 +727,7 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s,
                         0, 0, 0,
                         ref_picture, pix_op, qpix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
-        }else if(!is_mpeg12 && CONFIG_WMV2 && s->mspel){
+        }else if(!is_mpeg12 && CONFIG_WMV2 && s->mspel && s->codec_id == CODEC_ID_WMV2){
             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
                         ref_picture, pix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);

From 0dfcbe5285f04964f5de5e15a4bfbf83fb9fd082 Mon Sep 17 00:00:00 2001
From: Mina Nagy Zaki <mnzaki@gmail.com>
Date: Wed, 8 Jun 2011 19:24:25 +0300
Subject: [PATCH 256/315] lavfi: avfilter_merge_formats: handle case where
 inputs are same

This fixes a double-free crash if lists are the same due to the two
merge_ref() calls at the end of the (useless) merging that happens.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 11b6a82412bcd372adf694a26d83b07d337e1325)

Conflicts:

	libavfilter/formats.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit e5f4e249422834f727bcd432b73af971277f1371)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit b6c5848a1f8fc2755ea70d325acaddae9fac45ab)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit a4e277312cacfb78ef7583ed0b4fe4ccf5a0bcb1)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavfilter/formats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavfilter/formats.c b/libavfilter/formats.c
index 33fec163a5..c91f8b2cf8 100644
--- a/libavfilter/formats.c
+++ b/libavfilter/formats.c
@@ -43,6 +43,9 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b)
     AVFilterFormats *ret;
     unsigned i, j, k = 0;
 
+    if (a == b)
+        return a;
+
     ret = av_mallocz(sizeof(AVFilterFormats));
 
     /* merge list of formats */

From 272e7f6443b76fb47192930d157bfd9284294188 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 29 Jan 2013 18:29:41 +0100
Subject: [PATCH 257/315] huffyuvdec: Check init_vlc() return codes.

Prevents out of array writes

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f67a0d115254461649470452058fa3c28c0df294)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 95ab8d33e1a680f30a5a9605175112008ab81afc)

Conflicts:

	libavcodec/huffyuv.c
(cherry picked from commit 277def59fce10d91e3113e5c0f63e22bc4abfa88)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/huffyuv.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index dfa06d53a6..93e9e745c3 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -318,6 +318,7 @@ static void generate_joint_tables(HYuvContext *s){
                     int len1 = s->len[p][u];
                     if(len1 > limit)
                         continue;
+                    assert(i < (1 << VLC_BITS));
                     len[i] = len0 + len1;
                     bits[i] = (s->bits[0][y] << len1) + s->bits[p][u];
                     symbols[i] = (y<<8) + u;
@@ -351,6 +352,7 @@ static void generate_joint_tables(HYuvContext *s){
                     int len2 = s->len[2][r&255];
                     if(len2 > limit1)
                         continue;
+                    assert(i < (1 << VLC_BITS));
                     len[i] = len0 + len1 + len2;
                     bits[i] = (code << len2) + s->bits[2][r&255];
                     if(s->decorrelate){
@@ -374,6 +376,7 @@ static void generate_joint_tables(HYuvContext *s){
 static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
     GetBitContext gb;
     int i;
+    int ret;
 
     init_get_bits(&gb, src, length*8);
 
@@ -389,7 +392,8 @@ printf("%6X, %2d,  %3d\n", s->bits[i][j], s->len[i][j], j);
 }
 #endif
         free_vlc(&s->vlc[i]);
-        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
+        if ((ret = init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0)) < 0)
+            return ret;
     }
 
     generate_joint_tables(s);
@@ -401,6 +405,7 @@ static int read_old_huffman_tables(HYuvContext *s){
 #if 1
     GetBitContext gb;
     int i;
+    int ret;
 
     init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8);
     read_len_table(s->len[0], &gb);
@@ -419,7 +424,8 @@ static int read_old_huffman_tables(HYuvContext *s){
 
     for(i=0; i<3; i++){
         free_vlc(&s->vlc[i]);
-        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
+        if ((ret = init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0)) < 0)
+            return ret;
     }
 
     generate_joint_tables(s);

From ac476bfa9f90587eadef5b98cfc40ec77dde3f18 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 29 Jan 2013 19:22:33 +0100
Subject: [PATCH 258/315] huffyuvdec: Skip len==0 cases

Fixes vlc decoding for hypothetical files that would contain such cases.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 0dfc01c2bbf4b71bb56201bc4a393321e15d1b31)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 5ff41ffeb4cb9ea6df49757dc859619dc3d3ab4f)

Conflicts:

	libavcodec/huffyuv.c
(cherry picked from commit 9bc70fe1ae50fd2faa0b9429d47cfbda01a92ebc)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/huffyuv.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index 93e9e745c3..aad9168912 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -312,11 +312,11 @@ static void generate_joint_tables(HYuvContext *s){
             for(i=y=0; y<256; y++){
                 int len0 = s->len[0][y];
                 int limit = VLC_BITS - len0;
-                if(limit <= 0)
+                if(limit <= 0 || !len0)
                     continue;
                 for(u=0; u<256; u++){
                     int len1 = s->len[p][u];
-                    if(len1 > limit)
+                    if (len1 > limit || !len1)
                         continue;
                     assert(i < (1 << VLC_BITS));
                     len[i] = len0 + len1;
@@ -340,17 +340,17 @@ static void generate_joint_tables(HYuvContext *s){
         for(i=0, g=-16; g<16; g++){
             int len0 = s->len[p0][g&255];
             int limit0 = VLC_BITS - len0;
-            if(limit0 < 2)
+            if (limit0 < 2 || !len0)
                 continue;
             for(b=-16; b<16; b++){
                 int len1 = s->len[p1][b&255];
                 int limit1 = limit0 - len1;
-                if(limit1 < 1)
+                if (limit1 < 1 || !len1)
                     continue;
                 code = (s->bits[p0][g&255] << len1) + s->bits[p1][b&255];
                 for(r=-16; r<16; r++){
                     int len2 = s->len[2][r&255];
-                    if(len2 > limit1)
+                    if (len2 > limit1 || !len2)
                         continue;
                     assert(i < (1 << VLC_BITS));
                     len[i] = len0 + len1 + len2;

From 5c9d2d837783b636fb0f923709a2554bbc1a49dd Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 24 Jan 2013 14:26:56 +0100
Subject: [PATCH 259/315] Bump version number for the 0.5.10 release

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 416bfb0a22..50c76ef872 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.9
+0.5.10

From 0f6d4da8def2c697b2512a12285a227ec7d5bb9b Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 16 Sep 2012 08:33:09 +0200
Subject: [PATCH 260/315] bmpdec: only initialize palette for pal8.

Gray8 is not considered to be paletted, so this would cause an invalid
write.

Fixes bug 367.

CC: libav-stable@libav.org
(cherry picked from commit 8b78c2969a5b7dca939d93bf525aa2bcd737b5d9)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavcodec/bmp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/bmp.c b/libavcodec/bmp.c
index 14c070da8f..30bae8c7d0 100644
--- a/libavcodec/bmp.c
+++ b/libavcodec/bmp.c
@@ -217,9 +217,6 @@ static int bmp_decode_frame(AVCodecContext *avctx,
     if(comp == BMP_RLE4 || comp == BMP_RLE8)
         memset(p->data[0], 0, avctx->height * p->linesize[0]);
 
-    if(depth == 4 || depth == 8)
-        memset(p->data[1], 0, 1024);
-
     if(height > 0){
         ptr = p->data[0] + (avctx->height - 1) * p->linesize[0];
         linesize = -p->linesize[0];
@@ -229,6 +226,9 @@ static int bmp_decode_frame(AVCodecContext *avctx,
     }
 
     if(avctx->pix_fmt == PIX_FMT_PAL8){
+
+        memset(p->data[1], 0, 1024);
+
         buf = buf0 + 14 + ihsize; //palette location
         if((hsize-ihsize-14)>>depth < 4){ // OS/2 bitmap, 3 bytes per palette entry
             for(i = 0; i < (1 << depth); i++)

From 4475a7d88b89a6ff064f8917ee71657291d1a37a Mon Sep 17 00:00:00 2001
From: Mina Nagy Zaki <mnzaki@gmail.com>
Date: Wed, 8 Jun 2011 19:24:25 +0300
Subject: [PATCH 261/315] lavfi: avfilter_merge_formats: handle case where
 inputs are same

This fixes a double-free crash if lists are the same due to the two
merge_ref() calls at the end of the (useless) merging that happens.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 11b6a82412bcd372adf694a26d83b07d337e1325)

Conflicts:

	libavfilter/formats.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavfilter/formats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavfilter/formats.c b/libavfilter/formats.c
index c91f8b2cf8..39b3453e41 100644
--- a/libavfilter/formats.c
+++ b/libavfilter/formats.c
@@ -43,6 +43,9 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b)
     AVFilterFormats *ret;
     unsigned i, j, k = 0;
 
+    if (a == b)
+        return a;
+
     if (a == b)
         return a;
 

From 6b97e76dfca87ae868fbd2dff689e9de2ee45bcc Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 20 Apr 2012 17:42:18 +0200
Subject: [PATCH 262/315] avsdec: Set dimensions instead of relying on the
 demuxer.

The decode function assumes that the video will have those dimensions.

Fixes CVE-2012-2801

CC:libav-stable@libav.org

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 85f477935cd6b34e6ec2716b20e15ce748277a89)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/avs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/avs.c b/libavcodec/avs.c
index 3b29c853b4..e8a55fdddd 100644
--- a/libavcodec/avs.c
+++ b/libavcodec/avs.c
@@ -145,6 +145,7 @@ avs_decode_frame(AVCodecContext * avctx,
 static av_cold int avs_decode_init(AVCodecContext * avctx)
 {
     avctx->pix_fmt = PIX_FMT_PAL8;
+    avcodec_set_dimensions(avctx, 318, 198);
     return 0;
 }
 

From c28c631d29a8a6388a58fb3aafb940c607915ff2 Mon Sep 17 00:00:00 2001
From: Aneesh Dogra <lionaneesh@gmail.com>
Date: Tue, 20 Dec 2011 03:54:50 +0530
Subject: [PATCH 263/315] bytestream: add a new set of bytestream functions
 with overread checking

Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
---
 libavcodec/bytestream.h | 44 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/libavcodec/bytestream.h b/libavcodec/bytestream.h
index b56f6ce743..7ca36f8ad3 100644
--- a/libavcodec/bytestream.h
+++ b/libavcodec/bytestream.h
@@ -26,6 +26,10 @@
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 
+typedef struct {
+    const uint8_t *buffer, *buffer_end;
+} GetByteContext;
+
 #define DEF_T(type, name, bytes, read, write)                             \
 static av_always_inline type bytestream_get_ ## name(const uint8_t **b){\
     (*b) += bytes;\
@@ -34,6 +38,18 @@ static av_always_inline type bytestream_get_ ## name(const uint8_t **b){\
 static av_always_inline void bytestream_put_ ##name(uint8_t **b, const type value){\
     write(*b, value);\
     (*b) += bytes;\
+}\
+static av_always_inline type bytestream2_get_ ## name(GetByteContext *g)\
+{\
+    if (g->buffer_end - g->buffer < bytes)\
+        return 0;\
+    return bytestream_get_ ## name(&g->buffer);\
+}\
+static av_always_inline type bytestream2_peek_ ## name(GetByteContext *g)\
+{\
+    if (g->buffer_end - g->buffer < bytes)\
+        return 0;\
+    return read(g->buffer);\
 }
 
 #define DEF(name, bytes, read, write) \
@@ -55,6 +71,34 @@ DEF  (byte, 1, AV_RB8 , AV_WB8 )
 #undef DEF64
 #undef DEF_T
 
+static av_always_inline void bytestream2_init(GetByteContext *g,
+                                              const uint8_t *buf, int buf_size)
+{
+    g->buffer =  buf;
+    g->buffer_end = buf + buf_size;
+}
+
+static av_always_inline unsigned int bytestream2_get_bytes_left(GetByteContext *g)
+{
+    return g->buffer_end - g->buffer;
+}
+
+static av_always_inline void bytestream2_skip(GetByteContext *g,
+                                              unsigned int size)
+{
+    g->buffer += FFMIN(g->buffer_end - g->buffer, size);
+}
+
+static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g,
+                                                            uint8_t *dst,
+                                                            unsigned int size)
+{
+    int size2 = FFMIN(g->buffer_end - g->buffer, size);
+    memcpy(dst, g->buffer, size2);
+    g->buffer += size2;
+    return size2;
+}
+
 static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b, uint8_t *dst, unsigned int size)
 {
     memcpy(dst, *b, size);

From d1729c3715af6901788058be46e8a73372e434bf Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 28 Sep 2012 15:42:29 +0200
Subject: [PATCH 264/315] avidec: use actually read size instead of requested
 size

Fixes CVE-2012-2788
(cherry picked from commit 0af49a63c7f87876486ab09482d5b26b95abce60)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/avidec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 78e5051e1e..74edebf41c 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -780,7 +780,7 @@ resync:
             else
                 ast->frame_offset++;
         }
-        ast->remaining -= size;
+        ast->remaining -= err;
         if(!ast->remaining){
             avi->stream_index= -1;
             ast->packet_size= 0;

From 4fac60d568634a2604189e9dce139c100ef31925 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 24 Mar 2012 02:40:24 +0100
Subject: [PATCH 265/315] cavsdec: check for changing w/h.

Our decoder does not support changing w/h.

Fixes CVE-2012-2777 and CVE-2012-2784.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit c20a69630619d14ae92c5541d52c579d7c8f3e94)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/cavsdec.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index 8d30040d74..abfad66259 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -599,12 +599,21 @@ static int decode_pic(AVSContext *h) {
 static int decode_seq_header(AVSContext *h) {
     MpegEncContext *s = &h->s;
     int frame_rate_code;
+    int width, height;
 
     h->profile =         get_bits(&s->gb,8);
     h->level =           get_bits(&s->gb,8);
     skip_bits1(&s->gb); //progressive sequence
-    s->width =           get_bits(&s->gb,14);
-    s->height =          get_bits(&s->gb,14);
+
+    width  = get_bits(&s->gb, 14);
+    height = get_bits(&s->gb, 14);
+    if ((s->width || s->height) && (s->width != width || s->height != height)) {
+        av_log(s, AV_LOG_ERROR, "Width/height changing in CAVS is unsupported");
+        return AVERROR_PATCHWELCOME;
+    }
+    s->width  = width;
+    s->height = height;
+
     skip_bits(&s->gb,2); //chroma format
     skip_bits(&s->gb,3); //sample_precision
     h->aspect_ratio =    get_bits(&s->gb,4);

From 2ae6bdbb9b173932493c74efecd2048fe592e170 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 28 Sep 2012 15:26:48 +0200
Subject: [PATCH 266/315] avidec: return 0, not packet size from read_packet().

(cherry picked from commit eeade678f0a2bac127aeed2fb68d8717a6463420)

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/avidec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 74edebf41c..46dffa11b8 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -786,7 +786,7 @@ resync:
             ast->packet_size= 0;
         }
 
-        return size;
+        return 0;
     }
 
     memset(d, -1, sizeof(int)*8);

From d4e4234147c93289b669fdebb2b0bf9eaaf45625 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 5 Oct 2012 15:53:32 +0200
Subject: [PATCH 267/315] yuv4mpeg: return proper error codes.

Fixes Bug 373.

CC:libav-stable@libav.org
(cherry picked from commit d3a72becc6371563185a509b94f5daf32ddbb485)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/yuv4mpeg.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/libavformat/yuv4mpeg.c b/libavformat/yuv4mpeg.c
index 3fd7927884..1f3892fc60 100644
--- a/libavformat/yuv4mpeg.c
+++ b/libavformat/yuv4mpeg.c
@@ -340,7 +340,7 @@ static int yuv4_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     int i;
     char header[MAX_FRAME_HEADER+1];
-    int packet_size, width, height;
+    int packet_size, width, height, ret;
     AVStream *st = s->streams[0];
     struct frame_attributes *s1 = s->priv_data;
 
@@ -351,18 +351,28 @@ static int yuv4_read_packet(AVFormatContext *s, AVPacket *pkt)
             break;
         }
     }
-    if (i == MAX_FRAME_HEADER) return -1;
-    if (strncmp(header, Y4M_FRAME_MAGIC, strlen(Y4M_FRAME_MAGIC))) return -1;
+    if (s->pb->error)
+        return s->pb->error;
+    else if (s->pb->eof_reached)
+        return AVERROR_EOF;
+    else if (i == MAX_FRAME_HEADER)
+        return AVERROR_INVALIDDATA;
+
+    if (strncmp(header, Y4M_FRAME_MAGIC, strlen(Y4M_FRAME_MAGIC)))
+        return AVERROR_INVALIDDATA;
 
     width = st->codec->width;
     height = st->codec->height;
 
     packet_size = avpicture_get_size(st->codec->pix_fmt, width, height);
     if (packet_size < 0)
-        return -1;
+        return packet_size;
 
-    if (av_get_packet(s->pb, pkt, packet_size) != packet_size)
-        return AVERROR(EIO);
+    ret = av_get_packet(s->pb, pkt, packet_size);
+    if (ret < 0)
+        return ret;
+    else if (ret != packet_size)
+        return s->pb->eof_reached ? AVERROR_EOF : AVERROR(EIO);
 
     if (s->streams[0]->codec->coded_frame) {
         s->streams[0]->codec->coded_frame->interlaced_frame = s1->interlaced_frame;

From 6731776795a8f7e60991c6185480043a2b94a7ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jind=C5=99ich=20Makovi=C4=8Dka?= <makovick@gmail.com>
Date: Sat, 29 Sep 2012 11:16:45 +0200
Subject: [PATCH 268/315] h264: avoid stuck buffer pointer in decode_nal_units
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When decode_nal_units() previously encountered a NAL_END_SEQUENCE,
and there are some junk bytes left in the input buffer, but no start codes,
buf_index gets stuck 3 bytes before the end of the buffer.

This can trigger an infinite loop in the caller code, eg. in
try_decode_trame(), as avcodec_decode_video() then keeps returning zeroes,
with 3 bytes of the input packet still available.

With this change, the remaining bytes are skipped so the whole packet gets
consumed.

CC:libav-stable@libav.org

Signed-off-by: Jindřich Makovička <makovick@gmail.com>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
(cherry picked from commit 1a8c6917f68f7378465e18f7615762bfd22704c2)

Conflicts:

	libavcodec/h264.c
---
 libavcodec/h264.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index c8b561d155..b7eacc5cb1 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -7456,7 +7456,11 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
                     break;
             }
 
-            if(buf_index+3 >= buf_size) break;
+
+            if (buf_index + 3 >= buf_size) {
+                buf_index = buf_size;
+                break;
+            }
 
             buf_index+=3;
         }

From 5235db68c062581b8625b4a432d96101f1a23f44 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Fri, 28 Sep 2012 14:38:13 +0200
Subject: [PATCH 269/315] mpegaudiodec: fix short_start calculation

The value should be always 3, as it follows from the specification.

Fix a stack buffer overflow in exponents_from_scale_factors as reported
by asan. Thanks to Dale Curtis for the sample vector.
(cherry picked from commit 97cfa55eea39cef30abe14682c56c1e4e7f6f10d)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/mpegaudiodec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index e7bbd5d541..c95a571f72 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -173,7 +173,7 @@ void ff_compute_band_indexes(MPADecodeContext *s, GranuleDef *g){
             else
                 g->long_end = 4; /* 8000 Hz */
 
-            g->short_start = 2 + (s->sample_rate_index != 8);
+            g->short_start = 3;
         } else {
             g->long_end = 0;
             g->short_start = 0;

From 80f89a9b40cce11b38385f92293c8c2cf5eee395 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Wed, 19 Sep 2012 11:12:58 -0700
Subject: [PATCH 270/315] tiffenc: Check av_malloc() results.

(cherry picked from commit b92dfb56d4582633571db18c3d904f8602eaa2a6)

Conflicts:

	libavcodec/tiffenc.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/tiffenc.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/libavcodec/tiffenc.c b/libavcodec/tiffenc.c
index 1bc3c82c7e..4d6172fc46 100644
--- a/libavcodec/tiffenc.c
+++ b/libavcodec/tiffenc.c
@@ -304,6 +304,10 @@ static int encode_frame(AVCodecContext * avctx, unsigned char *buf,
 
     strip_sizes = av_mallocz(sizeof(*strip_sizes) * strips);
     strip_offsets = av_mallocz(sizeof(*strip_offsets) * strips);
+    if (!strip_sizes || !strip_offsets) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
 
     bytes_per_row = (((s->width - 1)/s->subsampling[0] + 1) * s->bpp
                     * s->subsampling[0] * s->subsampling[1] + 7) >> 3;
@@ -311,6 +315,7 @@ static int encode_frame(AVCodecContext * avctx, unsigned char *buf,
         yuv_line = av_malloc(bytes_per_row);
         if (yuv_line == NULL){
             av_log(s->avctx, AV_LOG_ERROR, "Not enough memory\n");
+            ret = AVERROR(ENOMEM);
             goto fail;
         }
     }
@@ -323,6 +328,10 @@ static int encode_frame(AVCodecContext * avctx, unsigned char *buf,
 
         zlen = bytes_per_row * s->rps;
         zbuf = av_malloc(zlen);
+        if (!zbuf) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
         strip_offsets[0] = ptr - buf;
         zn = 0;
         for (j = 0; j < s->rps; j++) {
@@ -347,8 +356,13 @@ static int encode_frame(AVCodecContext * avctx, unsigned char *buf,
     } else
 #endif
     {
-        if(s->compr == TIFF_LZW)
+        if (s->compr == TIFF_LZW) {
             s->lzws = av_malloc(ff_lzw_encode_state_size);
+            if (!s->lzws) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+        }
         for (i = 0; i < s->height; i++) {
             if (strip_sizes[i / s->rps] == 0) {
                 if(s->compr == TIFF_LZW){

From 1f1b2f18067bef0339c5b223a06421ed200ed60c Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Fri, 26 Oct 2012 22:55:04 +0200
Subject: [PATCH 271/315] yuv4mpeg: reject unsupported codecs

The muxer already rejects unsupported pixel formats, reject also
unsupported codecs to prevent dangerous misuses.
(cherry picked from commit 424b1e764263b1493de4c34365ef367ddae856db)

Conflicts:

	libavformat/yuv4mpeg.c

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/yuv4mpeg.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavformat/yuv4mpeg.c b/libavformat/yuv4mpeg.c
index 1f3892fc60..19e8be7636 100644
--- a/libavformat/yuv4mpeg.c
+++ b/libavformat/yuv4mpeg.c
@@ -152,6 +152,11 @@ static int yuv4_write_header(AVFormatContext *s)
     if (s->nb_streams != 1)
         return AVERROR(EIO);
 
+    if (s->streams[0]->codec->codec_id != CODEC_ID_RAWVIDEO) {
+        av_log(s, AV_LOG_ERROR, "ERROR: Only rawvideo supported.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     if (s->streams[0]->codec->pix_fmt == PIX_FMT_YUV411P) {
         av_log(s, AV_LOG_ERROR, "Warning: generating rarely used 4:1:1 YUV stream, some mjpegtools might not work.\n");
     }

From 2e1474fd9988e0d8749b8ba2eb46a945ef37dfb7 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Fri, 23 Nov 2012 14:05:36 +0100
Subject: [PATCH 272/315] lavf: avoid integer overflow in
 ff_compute_frame_duration()

Scaling the denominator instead of the numerator if it is too large
loses precision. Fixes an assert caused by a negative frame duration in
the fuzzed sample nasa-8s2.ts_s202310.

CC: libav-stable@libav.org
(cherry picked from commit 7709ce029a7bc101b9ac1ceee607cda10dcb89dc)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/utils.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 223d567f75..271502327f 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -680,7 +680,10 @@ static void compute_frame_duration(int *pnum, int *pden, AVStream *st,
             *pnum = st->codec->time_base.num;
             *pden = st->codec->time_base.den;
             if (pc && pc->repeat_pict) {
-                *pnum = (*pnum) * (1 + pc->repeat_pict);
+                if (*pnum > INT_MAX / (1 + pc->repeat_pict))
+                    *pden /= 1 + pc->repeat_pict;
+                else
+                    *pnum *= 1 + pc->repeat_pict;
             }
         }
         break;

From c3761b661874174a63aded4933a62aa1246f9339 Mon Sep 17 00:00:00 2001
From: Dale Curtis <dalecurtis@chromium.org>
Date: Wed, 7 Mar 2012 14:26:58 -0800
Subject: [PATCH 273/315] Fix uninitialized reads on malformed ogg files.

The ogg decoder wasn't padding the input buffer with the appropriate
FF_INPUT_BUFFER_PADDING_SIZE bytes. Which led to uninitialized reads in
various pieces of parsing code when they thought they had more data than
they actually did.

Signed-off-by: Dale Curtis <dalecurtis@chromium.org>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
(cherry picked from commit ef0d779706c77ca9007527bd8d41e9400682f4e4)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavformat/oggdec.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 54406f5479..cf1df8425a 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -66,8 +66,7 @@ ogg_save (AVFormatContext * s)
 
     for (i = 0; i < ogg->nstreams; i++){
         struct ogg_stream *os = ogg->streams + i;
-        os->buf = av_malloc (os->bufsize);
-        memset (os->buf, 0, os->bufsize);
+        os->buf = av_mallocz (os->bufsize + FF_INPUT_BUFFER_PADDING_SIZE);
         memcpy (os->buf, ost->streams[i].buf, os->bufpos);
     }
 
@@ -166,7 +165,7 @@ ogg_new_stream (AVFormatContext * s, uint32_t serial)
     os = ogg->streams + idx;
     os->serial = serial;
     os->bufsize = DECODER_BUFFER_SIZE;
-    os->buf = av_malloc(os->bufsize);
+    os->buf = av_malloc(os->bufsize + FF_INPUT_BUFFER_PADDING_SIZE);
     os->header = -1;
 
     st = av_new_stream (s, idx);
@@ -182,7 +181,7 @@ static int
 ogg_new_buf(struct ogg *ogg, int idx)
 {
     struct ogg_stream *os = ogg->streams + idx;
-    uint8_t *nb = av_malloc(os->bufsize);
+    uint8_t *nb = av_malloc(os->bufsize + FF_INPUT_BUFFER_PADDING_SIZE);
     int size = os->bufpos - os->pstart;
     if(os->buf){
         memcpy(nb, os->buf + os->pstart, size);
@@ -279,7 +278,7 @@ ogg_read_page (AVFormatContext * s, int *str)
     }
 
     if (os->bufsize - os->bufpos < size){
-        uint8_t *nb = av_malloc (os->bufsize *= 2);
+        uint8_t *nb = av_malloc ((os->bufsize *= 2) + FF_INPUT_BUFFER_PADDING_SIZE);
         memcpy (nb, os->buf, os->bufpos);
         av_free (os->buf);
         os->buf = nb;
@@ -293,6 +292,7 @@ ogg_read_page (AVFormatContext * s, int *str)
     os->granule = gp;
     os->flags = flags;
 
+    memset(os->buf + os->bufpos, 0, FF_INPUT_BUFFER_PADDING_SIZE);
     if (str)
         *str = idx;
 

From fe4409a396d7f577fbcac6c2ff0df3c6eabc3727 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sat, 22 Dec 2012 17:58:24 +0100
Subject: [PATCH 274/315] oggdec: check memory allocation

(cherry picked from commit ba064ebe48376e199f353ef0b335ed8a39c638c5)

Conflicts:

	libavformat/oggdec.c
---
 libavformat/oggdec.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index cf1df8425a..0bf7db0e52 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -159,8 +159,13 @@ ogg_new_stream (AVFormatContext * s, uint32_t serial)
     AVStream *st;
     struct ogg_stream *os;
 
-    ogg->streams = av_realloc (ogg->streams,
-                               ogg->nstreams * sizeof (*ogg->streams));
+    os = av_realloc (ogg->streams, ogg->nstreams * sizeof (*ogg->streams));
+
+    if (!os)
+        return AVERROR(ENOMEM);
+
+    ogg->streams = os;
+
     memset (ogg->streams + idx, 0, sizeof (*ogg->streams));
     os = ogg->streams + idx;
     os->serial = serial;
@@ -279,6 +284,8 @@ ogg_read_page (AVFormatContext * s, int *str)
 
     if (os->bufsize - os->bufpos < size){
         uint8_t *nb = av_malloc ((os->bufsize *= 2) + FF_INPUT_BUFFER_PADDING_SIZE);
+        if (!nb)
+            return AVERROR(ENOMEM);
         memcpy (nb, os->buf, os->bufpos);
         av_free (os->buf);
         os->buf = nb;

From a49599b1255ec0300cdec1591edf506433407804 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Wed, 28 Nov 2012 22:17:14 +0100
Subject: [PATCH 275/315] h264: check context state before decoding slice data
 partitions

Fixes mov_h264_aac__Demo_FlagOfOurFathers.mov.SIGSEGV.4e9.656.

Found-by: Mateusz "j00ru" Jurczyk
CC: libav-stable@libav.org
(cherry-picked from commit c1fcf563b13051f280db169ba41c6a1b21b25e08)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/h264.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index b7eacc5cb1..a4d26f0977 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -7554,6 +7554,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
             hx->inter_gb_ptr= &hx->inter_gb;
 
             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
+               && s->current_picture_ptr
                && s->context_initialized
                && s->hurry_up < 5
                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)

From 7aeb281aa5078726eef5f7db0e7b513932454dc0 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 15 Sep 2011 18:08:52 -0400
Subject: [PATCH 276/315] shorten: check for realloc failure

(cherry picked from commit 9e5e2c2d010c05c10337e9c1ec9d0d61495e0c9c)

Conflicts:

	libavcodec/shorten.c
---
 libavcodec/shorten.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index 053f5c2ed1..9d66d76b4f 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -112,6 +112,8 @@ static av_cold int shorten_decode_init(AVCodecContext * avctx)
 static int allocate_buffers(ShortenContext *s)
 {
     int i, chan;
+    void *tmp_ptr;
+
     for (chan=0; chan<s->channels; chan++) {
         if(FFMAX(1, s->nmean) >= UINT_MAX/sizeof(int32_t)){
             av_log(s->avctx, AV_LOG_ERROR, "nmean too large\n");
@@ -122,9 +124,15 @@ static int allocate_buffers(ShortenContext *s)
             return -1;
         }
 
-        s->offset[chan] = av_realloc(s->offset[chan], sizeof(int32_t)*FFMAX(1, s->nmean));
+        tmp_ptr = av_realloc(s->offset[chan], sizeof(int32_t)*FFMAX(1, s->nmean));
+        if (!tmp_ptr)
+            return AVERROR(ENOMEM);
+        s->offset[chan] = tmp_ptr;
 
-        s->decoded[chan] = av_realloc(s->decoded[chan], sizeof(int32_t)*(s->blocksize + s->nwrap));
+        tmp_ptr = av_realloc(s->decoded[chan], sizeof(int32_t)*(s->blocksize + s->nwrap));
+        if (!tmp_ptr)
+            return AVERROR(ENOMEM);
+        s->decoded[chan] = tmp_ptr;
         for (i=0; i<s->nwrap; i++)
             s->decoded[chan][i] = 0;
         s->decoded[chan] += s->nwrap;
@@ -275,8 +283,15 @@ static int shorten_decode_frame(AVCodecContext *avctx,
     int i, input_buf_size = 0;
     int16_t *samples = data;
     if(s->max_framesize == 0){
+        void *tmp_ptr;
         s->max_framesize= 1024; // should hopefully be enough for the first header
-        s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize);
+        tmp_ptr = av_fast_realloc(s->bitstream, &s->allocated_bitstream_size,
+                                  s->max_framesize);
+        if (!tmp_ptr) {
+            av_log(avctx, AV_LOG_ERROR, "error allocating bitstream buffer\n");
+            return AVERROR(ENOMEM);
+        }
+        s->bitstream = tmp_ptr;
     }
 
     if(1 && s->max_framesize){//FIXME truncated

From 9def5c466648d970f8d3e03d4b3947a6852d9c61 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 25 Dec 2011 12:28:50 +0100
Subject: [PATCH 277/315] shorten: Use separate pointers for the allocated
 memory for decoded samples.

Fixes invalid free() if any of the buffers are not allocated due to either
not decoding a header or an error prior to allocating all buffers.

Fixes CVE-2012-0858
CC: libav-stable@libav.org

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
(cherry picked from commit 204cb29b3c84a74cbcd059d353c70c8bdc567d98)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/shorten.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index 9d66d76b4f..09290fc1ed 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -82,6 +82,7 @@ typedef struct ShortenContext {
     int channels;
 
     int32_t *decoded[MAX_CHANNELS];
+    int32_t *decoded_base[MAX_CHANNELS];
     int32_t *offset[MAX_CHANNELS];
     uint8_t *bitstream;
     int bitstream_size;
@@ -129,13 +130,14 @@ static int allocate_buffers(ShortenContext *s)
             return AVERROR(ENOMEM);
         s->offset[chan] = tmp_ptr;
 
-        tmp_ptr = av_realloc(s->decoded[chan], sizeof(int32_t)*(s->blocksize + s->nwrap));
+        tmp_ptr = av_realloc(s->decoded_base[chan], (s->blocksize + s->nwrap) *
+                             sizeof(s->decoded_base[0][0]));
         if (!tmp_ptr)
             return AVERROR(ENOMEM);
-        s->decoded[chan] = tmp_ptr;
+        s->decoded_base[chan] = tmp_ptr;
         for (i=0; i<s->nwrap; i++)
-            s->decoded[chan][i] = 0;
-        s->decoded[chan] += s->nwrap;
+            s->decoded_base[chan][i] = 0;
+        s->decoded[chan] = s->decoded_base[chan] + s->nwrap;
     }
     return 0;
 }
@@ -523,8 +525,8 @@ static av_cold int shorten_decode_close(AVCodecContext *avctx)
     int i;
 
     for (i = 0; i < s->channels; i++) {
-        s->decoded[i] -= s->nwrap;
-        av_freep(&s->decoded[i]);
+        s->decoded[i] = NULL;
+        av_freep(&s->decoded_base[i]);
         av_freep(&s->offset[i]);
     }
     av_freep(&s->bitstream);

From 4f8f4458a5a837bf58ae3b3662b0ec4278682612 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Fri, 14 Dec 2012 09:55:04 +0100
Subject: [PATCH 278/315] vp56: release frames on error

Fixes CVE-2012-2783

CC: libav-stable@libav.org

(cherry picked from commit f33b5ba63eee96c9d1c7f0e568169cb0c3694238)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 7fd7950174f9f2935fbf5bf1435fd0dc37be5c61)

Conflicts:

	libavcodec/vp56.c
---
 libavcodec/vp56.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index c09dbeb2f8..2b70d2b2f8 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -516,8 +516,14 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
         s->modelp = &s->models[is_alpha];
 
         res = s->parse_header(s, buf, remaining_buf_size, &golden_frame);
-        if (!res)
-            return -1;
+        if (!res) {
+            int i;
+            for (i = 0; i < 4; i++) {
+                if (s->frames[i].data[0])
+                    avctx->release_buffer(avctx, &s->frames[i]);
+            }
+            return res;
+        }
 
         if (!is_alpha) {
             p->reference = 1;

From b9500bf864e9b5619f9d3b1331f4487a1a70ecf4 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Thu, 13 Dec 2012 16:20:19 +0100
Subject: [PATCH 279/315] vp6: properly fail on unsupported feature

Interlacing is not supported at all and mismanaged down the normal
codepaths causing possible buffer management issues.

Fixes: CVE-2012-2783
(cherry picked from commit be75fed9755c1285ba084574aff2d7ee0f81110d)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
(cherry picked from commit 4ede95e69cf964cd46b1e9fcd48da80d8d92c433)

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 libavcodec/vp6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index d9e9711cca..6d0d3b2d6e 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -61,8 +61,8 @@ static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
             return 0;
         s->filter_header = buf[1] & 0x06;
         if (buf[1] & 1) {
-            av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
-            return 0;
+            av_log(s->avctx, AV_LOG_WARNING, "interlacing not supported\n");
+            return AVERROR_PATCHWELCOME;
         }
         if (separated_coeff || !s->filter_header) {
             coeff_offset = AV_RB16(buf+2) - 2;

From 0b25c3b67cef73ab1370db99da906ebc215a3c72 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Thu, 13 Dec 2012 17:53:31 +0100
Subject: [PATCH 280/315] mpeg12: do not decode extradata more than once.

Fixes CVE-2012-2803.

(cherry picked from commit 582368626188c070d4300913c6da5efa4c24cfb2)
(cherry picked from commit 301761792a693a1f3303a2af34a0fb066a03c10c)

Conflicts:

	libavcodec/mpeg12.c
---
 libavcodec/mpeg12.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 9e67ee5c61..95d128214d 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -1163,6 +1163,7 @@ typedef struct Mpeg1Context {
     int save_width, save_height;
     AVRational frame_rate_ext;       ///< MPEG-2 specific framerate modificator
 
+    int extradata_decoded;
 } Mpeg1Context;
 
 static av_cold int mpeg_decode_init(AVCodecContext *avctx)
@@ -2299,8 +2300,10 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
 
     s->slice_count= 0;
 
-    if(avctx->extradata && !avctx->frame_number)
+    if (avctx->extradata && !s->extradata_decoded) {
         decode_chunks(avctx, picture, data_size, avctx->extradata, avctx->extradata_size);
+        s->extradata_decoded = 1;
+    }
 
     return decode_chunks(avctx, picture, data_size, buf, buf_size);
 }

From deb650c692317ccc3e5359f5b100c98dc88c514f Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Fri, 15 Feb 2013 19:41:04 +0100
Subject: [PATCH 281/315] Release notes and changelog for 0.5.10

---
 Changelog | 31 +++++++++++++++++++++++++++++++
 RELEASE   | 36 ++++++++++++++++++++++++++++--------
 2 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/Changelog b/Changelog
index b39ecc0817..4611254291 100644
--- a/Changelog
+++ b/Changelog
@@ -1,8 +1,39 @@
 Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
+version 0.5.10:
+
+- mpeg12: do not decode extradata more than once (CVE-2012-2803)
+- vp6: properly fail on unsupported feature (CVE-2012-2783)
+- vp56: release frames on error (CVE-2012-2783)
+- shorten: Use separate pointers for the allocated memory for decoded samples (CVE-2012-0858)
+- shorten: check for realloc failure
+- h264: check context state before decoding slice data partitions
+- oggdec: check memory allocation
+- Fix uninitialized reads on malformed Ogg files
+- lavf: avoid integer overflow in ff_compute_frame_duration()
+- yuv4mpeg: reject unsupported codecs
+- tiffenc: Check av_malloc() results
+- mpegaudiodec: fix short_start calculation
+- h264: avoid stuck buffer pointer in decode_nal_units
+- yuv4mpeg: return proper error codes (Bug 373)
+- avidec: return 0, not packet size from read_packet()
+- cavsdec: check for changing w/h (CVE-2012-2777 and CVE-2012-2784)
+- avidec: use actually read size instead of requested size CVE-2012-2788
+- bytestream: add a new set of bytestream functions with overread checking
+- avsdec: Set dimensions instead of relying on the demuxer (CVE-2012-2801)
+- lavfi: avfilter_merge_formats: handle case where inputs are same
+- bmpdec: only initialize palette for pal8 (Bug 367)
+- Bump version number for the 0.5.10 release
+- lavfi: avfilter_merge_formats: handle case where inputs are same
+- mpegvideo: Don't use ff_mspel_motion() for vc1
+- imgconvert: avoid undefined left shift in avcodec_find_best_pix_fmt
+- nuv: check RTjpeg header for validity
+- vc1dec: add flush function for WMV9 and VC-1 decoders
+
 
 version 0.5.9:
+
 - dpcm: ignore extra unpaired bytes in stereo streams (CVE-2011-3951)
 - h264: Add check for invalid chroma_format_idc (CVE-2012-0851)
 - adpcm: ADPCM Electronic Arts has always two channels (CVE-2012-0852)
diff --git a/RELEASE b/RELEASE
index c164d6b566..05e1cd0892 100644
--- a/RELEASE
+++ b/RELEASE
@@ -213,12 +213,32 @@ of changes please see the Changelog file.
 General notes
 -------------
 
-This mostly maintenance-only release addresses a number a number of bugs
-such as security and compilation issues that have been brought to our
-attention. Among other fixes, this release features includes security
-updates for the DPCM codecs (CVE-2011-3951), H.264 (CVE-2012-0851),
-ADPCM (CVE-2012-0852), and the KMVC decoder (CVE-2011-3952).
+This mostly maintenance-only release addresses a number of bugs such as
+security and compilation issues that have been brought to our
+attention. Among other fixes, this release includes security updates for
+the DPCM codecs (CVE-2011-3951), H.264 (CVE-2012-0851), ADPCM
+(CVE-2012-0852), and the KMVC decoder (CVE-2011-3952).
 
-Distributors and system integrators are encouraged
-to update and share their patches against this branch.  For a full list
-of changes please see the Changelog file or the git commit history.
+Distributors and system integrators are encouraged to update and share
+their patches against this branch. For a full list of changes please see
+the Changelog file or the Git commit history.
+
+
+
+* 0.5.10 Feb 16, 2013
+
+General notes
+-------------
+
+This maintenance-only release addresses a number of bugs such as
+security and compilation issues that have been brought to our
+attention. Among other fixes, this release includes security updates for
+the mpeg12 codecs (CVE-2012-2803), H.264, VP5/VP6 (CVE-2012-2783,
+CVE-2012-2783), shorten (CVE-2012-0858), CAVS (CVE-2012-2777 and
+CVE-2012-2784), AVS (CVE-2012-2801) and a number of additional safe but
+important bugs in other decoders. Additionally, reported bugs in the
+yuv4mpeg (Bug 373) and BMP decoder (Bug 367) have been addressed.
+
+Distributors and system integrators are encouraged to update and share
+their patches against this branch. For a full list of changes please
+see the Changelog file or the Git commit history.

From a23a3dba25448939e6be43c9196f1e6917258e2e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 30 Nov 2012 23:59:40 +0100
Subject: [PATCH 282/315] qdm2: check array index before use, fix out of array
 accesses

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit a7ee6281f7ef1c29284e3a4cadfe0f227ffde1ed)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/qdm2.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index e2f1a87033..ba68d02103 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1262,6 +1262,11 @@ static void qdm2_decode_super_block (QDM2Context *q)
     for (i = 0; packet_bytes > 0; i++) {
         int j;
 
+        if (i>=FF_ARRAY_ELEMS(q->sub_packet_list_A)) {
+            SAMPLES_NEEDED_2("too many packet bytes");
+            return;
+        }
+
         q->sub_packet_list_A[i].next = NULL;
 
         if (i > 0) {

From fee26d352a52eb9f7fcd8d9167fb4a5ba015b612 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 29 Nov 2012 15:18:17 +0100
Subject: [PATCH 283/315] roqvideodec: check dimensions validity

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 3ae610451170cd5a28b33950006ff0bd23036845)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/roqvideodec.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavcodec/roqvideodec.c b/libavcodec/roqvideodec.c
index c9daec729f..8949b92144 100644
--- a/libavcodec/roqvideodec.c
+++ b/libavcodec/roqvideodec.c
@@ -158,6 +158,12 @@ static av_cold int roq_decode_init(AVCodecContext *avctx)
     RoqContext *s = avctx->priv_data;
 
     s->avctx = avctx;
+
+    if (avctx->width%16 || avctx->height%16) {
+         av_log(avctx, AV_LOG_ERROR, "dimensions not being a multiple of 16 are unsupported\n");
+         return AVERROR_PATCHWELCOME;
+    }
+
     s->width = avctx->width;
     s->height = avctx->height;
     s->last_frame    = &s->frames[0];

From 13093f9767b922661132a3c1f4b5ba2c7338b660 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 25 Jan 2013 06:11:59 +0100
Subject: [PATCH 284/315] vqavideo: check chunk sizes before reading chunks

Fixes out of array writes

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit ab6c9332bfa1e20127a16392a0b85a4aa4840889)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/vqavideo.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c
index f34a63173b..18fced74f0 100644
--- a/libavcodec/vqavideo.c
+++ b/libavcodec/vqavideo.c
@@ -526,6 +526,11 @@ static void vqa_decode_chunk(VqaContext *s)
         chunk_size = AV_RB32(&s->buf[cbp0_chunk + 4]);
         cbp0_chunk += CHUNK_PREAMBLE_SIZE;
 
+        if (chunk_size > MAX_CODEBOOK_SIZE - s->next_codebook_buffer_index) {
+            av_log(s->avctx, AV_LOG_ERROR, "cbp0 chunk too large (0x%X bytes)\n", chunk_size);
+            return AVERROR_INVALIDDATA;
+        }
+
         /* accumulate partial codebook */
         memcpy(&s->next_codebook_buffer[s->next_codebook_buffer_index],
             &s->buf[cbp0_chunk], chunk_size);
@@ -549,6 +554,11 @@ static void vqa_decode_chunk(VqaContext *s)
         chunk_size = AV_RB32(&s->buf[cbpz_chunk + 4]);
         cbpz_chunk += CHUNK_PREAMBLE_SIZE;
 
+        if (chunk_size > MAX_CODEBOOK_SIZE - s->next_codebook_buffer_index) {
+            av_log(s->avctx, AV_LOG_ERROR, "cbpz chunk too large (0x%X bytes)\n", chunk_size);
+            return AVERROR_INVALIDDATA;
+        }
+
         /* accumulate partial codebook */
         memcpy(&s->next_codebook_buffer[s->next_codebook_buffer_index],
             &s->buf[cbpz_chunk], chunk_size);

From d34cfb33afb8c6e3c349e71d752beb9234afbcf0 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 17 Feb 2013 03:55:49 +0100
Subject: [PATCH 285/315] update for 0.5.11

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 RELEASE    | 2 +-
 VERSION    | 2 +-
 cmdutils.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/RELEASE b/RELEASE
index 0b9bf42c83..2677b999a8 100644
--- a/RELEASE
+++ b/RELEASE
@@ -235,7 +235,7 @@ the Changelog file or the Git commit history.
 
 
-* 0.5.10 Feb 16, 2013
+* 0.5.11 Feb 17, 2013
 
 General notes
 -------------
diff --git a/VERSION b/VERSION
index 50c76ef872..69626fb929 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.10
+0.5.11
diff --git a/cmdutils.c b/cmdutils.c
index f1e5352362..7c4c1a0080 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -50,7 +50,7 @@ AVCodecContext *avctx_opts[CODEC_TYPE_NB];
 AVFormatContext *avformat_opts;
 struct SwsContext *sws_opts;
 
-const int this_year = 2012;
+const int this_year = 2013;
 
 double parse_number_or_die(const char *context, const char *numstr, int type, double min, double max)
 {

From 2abf5eeea6e49d8ec4ca3797363d7a2eaef4dacc Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 17 Feb 2013 09:06:55 +0100
Subject: [PATCH 286/315] update year to 2013

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
---
 cmdutils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmdutils.c b/cmdutils.c
index 108eaccdd1..7c4c1a0080 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -50,7 +50,7 @@ AVCodecContext *avctx_opts[CODEC_TYPE_NB];
 AVFormatContext *avformat_opts;
 struct SwsContext *sws_opts;
 
-const int this_year = 2009;
+const int this_year = 2013;
 
 double parse_number_or_die(const char *context, const char *numstr, int type, double min, double max)
 {

From 01c90eea6cc08bab89322175b4c50ae8a96fcc2b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 20 Feb 2013 00:47:13 +0100
Subject: [PATCH 287/315] vqavideo: fix return type

Fixes Ticket2281

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 1fd86f9a2136165205b0370d5a6e916499f1da7f)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/vqavideo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c
index 18fced74f0..9edee58b2a 100644
--- a/libavcodec/vqavideo.c
+++ b/libavcodec/vqavideo.c
@@ -528,7 +528,7 @@ static void vqa_decode_chunk(VqaContext *s)
 
         if (chunk_size > MAX_CODEBOOK_SIZE - s->next_codebook_buffer_index) {
             av_log(s->avctx, AV_LOG_ERROR, "cbp0 chunk too large (0x%X bytes)\n", chunk_size);
-            return AVERROR_INVALIDDATA;
+            return;
         }
 
         /* accumulate partial codebook */
@@ -556,7 +556,7 @@ static void vqa_decode_chunk(VqaContext *s)
 
         if (chunk_size > MAX_CODEBOOK_SIZE - s->next_codebook_buffer_index) {
             av_log(s->avctx, AV_LOG_ERROR, "cbpz chunk too large (0x%X bytes)\n", chunk_size);
-            return AVERROR_INVALIDDATA;
+            return;
         }
 
         /* accumulate partial codebook */

From c2d3f06882abb0298a76a264b423d7c28155bb70 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 30 Jan 2013 22:56:45 +0100
Subject: [PATCH 288/315] wma: check byte_offset_bits

Fixes assertion failure

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 984add64a41c3296a8a82051cc90bff2eb449609)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/wma.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/wma.c b/libavcodec/wma.c
index ff01f49b5c..12faf273e5 100644
--- a/libavcodec/wma.c
+++ b/libavcodec/wma.c
@@ -133,6 +133,10 @@ int ff_wma_init(AVCodecContext * avctx, int flags2)
 
     bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate);
     s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2;
+    if (s->byte_offset_bits + 3 > MIN_CACHE_BITS) {
+        av_log(avctx, AV_LOG_ERROR, "byte_offset_bits %d is too large\n", s->byte_offset_bits);
+        return AVERROR_PATCHWELCOME;
+    }
 
     /* compute high frequency value and choose if noise coding should
        be activated */

From 5f5bf9faf902d48162d92876b150c7441e9d7cca Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 20 Feb 2013 02:24:30 +0100
Subject: [PATCH 289/315] avcodec_align_dimensions2: Ensure cinepak has large
 enough buffers.

This is partly redundant with the following patches, but its safer

Found-by: u-bo1b@0w.se
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f5c00b347dc76285c639d9878a014c40395c5228)

Conflicts:

	libavcodec/utils.c

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 2b6f3be08250683407c7a9846d7133b116661eae)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/utils.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index acfafa1344..99bfae4a0e 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -165,9 +165,10 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
     case PIX_FMT_PAL8:
     case PIX_FMT_BGR8:
     case PIX_FMT_RGB8:
-        if(s->codec_id == CODEC_ID_SMC){
-            w_align=4;
-            h_align=4;
+        if (s->codec_id == CODEC_ID_SMC ||
+            s->codec_id == CODEC_ID_CINEPAK) {
+            w_align = 4;
+            h_align = 4;
         }
         break;
     case PIX_FMT_BGR24:
@@ -176,6 +177,12 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
             h_align=4;
         }
         break;
+    case PIX_FMT_RGB24:
+        if (s->codec_id == CODEC_ID_CINEPAK) {
+            w_align = 4;
+            h_align = 4;
+        }
+        break;
     default:
         w_align= 1;
         h_align= 1;

From 81b754b1e42eb4eceee67f77cb07cd0f38bb4086 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 28 Feb 2013 03:54:37 +0100
Subject: [PATCH 290/315] Update for 0.5.12

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 69626fb929..9d6c1754eb 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.11
+0.5.12

From 588571d41ddb03f067929c31f66d3301992dde48 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 9 May 2013 17:53:33 +0200
Subject: [PATCH 291/315] Bump version number for the 0.5.11 release

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 50c76ef872..69626fb929 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.10
+0.5.11

From fde0b7d91c9cbcc427f87c2651f39d0075c66efe Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 22 Aug 2013 01:07:32 +0200
Subject: [PATCH 292/315] avcodec/rpza: Perform pointer advance and checks
 before using the pointers

Fixes out of array accesses
Fixes Ticket2850

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 3819db745da2ac7fb3faacb116788c32f4753f34)

Conflicts:

	libavcodec/rpza.c
(cherry picked from commit edba432b8b01d68c22e70a508f47553359f59fb5)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/rpza.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/rpza.c b/libavcodec/rpza.c
index 27ed71f937..4a0025ba63 100644
--- a/libavcodec/rpza.c
+++ b/libavcodec/rpza.c
@@ -84,7 +84,7 @@ static void rpza_decode_stream(RpzaContext *s)
     unsigned short *pixels = (unsigned short *)s->frame.data[0];
 
     int row_ptr = 0;
-    int pixel_ptr = 0;
+    int pixel_ptr = -4;
     int block_ptr;
     int pixel_x, pixel_y;
     int total_blocks;
@@ -140,6 +140,7 @@ static void rpza_decode_stream(RpzaContext *s)
             colorA = AV_RB16 (&s->buf[stream_ptr]);
             stream_ptr += 2;
             while (n_blocks--) {
+                ADVANCE_BLOCK()
                 block_ptr = row_ptr + pixel_ptr;
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     for (pixel_x = 0; pixel_x < 4; pixel_x++){
@@ -148,7 +149,6 @@ static void rpza_decode_stream(RpzaContext *s)
                     }
                     block_ptr += row_inc;
                 }
-                ADVANCE_BLOCK();
             }
             break;
 
@@ -185,6 +185,7 @@ static void rpza_decode_stream(RpzaContext *s)
             color4[2] |= ((21 * ta + 11 * tb) >> 5);
 
             while (n_blocks--) {
+                ADVANCE_BLOCK();
                 block_ptr = row_ptr + pixel_ptr;
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     index = s->buf[stream_ptr++];
@@ -195,12 +196,12 @@ static void rpza_decode_stream(RpzaContext *s)
                     }
                     block_ptr += row_inc;
                 }
-                ADVANCE_BLOCK();
             }
             break;
 
         /* Fill block with 16 colors */
         case 0x00:
+            ADVANCE_BLOCK();
             block_ptr = row_ptr + pixel_ptr;
             for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                 for (pixel_x = 0; pixel_x < 4; pixel_x++){
@@ -214,7 +215,6 @@ static void rpza_decode_stream(RpzaContext *s)
                 }
                 block_ptr += row_inc;
             }
-            ADVANCE_BLOCK();
             break;
 
         /* Unknown opcode */

From 31f9e849a88b23d7abeedc227fba3016bb440169 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 20 May 2013 04:00:30 +0200
Subject: [PATCH 293/315] matroska_read_seek: Fix used streams for subtitle
 index compensation

Might fix Ticket1907 (I have no testcase so i cant test)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 4758e32a6c48044f77102a49110c79b4f338f648)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/matroskadec.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 7350562402..21f1b5d3e0 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1820,10 +1820,11 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
         if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
             && !tracks[i].stream->discard != AVDISCARD_ALL) {
             index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD);
-            if (index_sub >= 0
-                && st->index_entries[index_sub].pos < st->index_entries[index_min].pos
-                && st->index_entries[index].timestamp - st->index_entries[index_sub].timestamp < 30000000000/matroska->time_scale)
-                index_min = index_sub;
+            while(index_sub >= 0
+                  && index_min >= 0
+                  && tracks[i].stream->index_entries[index_sub].pos < st->index_entries[index_min].pos
+                  && st->index_entries[index].timestamp - tracks[i].stream->index_entries[index_sub].timestamp < 30000000000/matroska->time_scale)
+                index_min--;
         }
     }
 

From e7484d54252d3442b64ed09770d4c84c44e104e9 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 30 Aug 2013 23:40:47 +0200
Subject: [PATCH 294/315] avcodec/dsputil: fix signedness in sizeof()
 comparissions

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 454a11a1c9c686c78aa97954306fb63453299760)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/dsputil.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 86a6c37793..4645e79bf1 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3497,7 +3497,7 @@ static void clear_blocks_c(DCTELEM *blocks)
 
 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
     long i;
-    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+    for(i=0; i<=w-(int)sizeof(long); i+=sizeof(long)){
         long a = *(long*)(src+i);
         long b = *(long*)(dst+i);
         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
@@ -3508,7 +3508,7 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
 
 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
     long i;
-    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+    for(i=0; i<=w-(int)sizeof(long); i+=sizeof(long)){
         long a = *(long*)(src1+i);
         long b = *(long*)(src2+i);
         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
@@ -3533,7 +3533,7 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
         }
     }else
 #endif
-    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+    for(i=0; i<=w-(int)sizeof(long); i+=sizeof(long)){
         long a = *(long*)(src1+i);
         long b = *(long*)(src2+i);
         *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);

From 617a9eedc654bb3bdd6122f036ab5ec6aa491224 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 9 Sep 2013 17:58:18 +0200
Subject: [PATCH 295/315] avcodec/ffv1enc: update buffer check for 16bps

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 3728603f1854b5c79d1a64dd3b41b80640ef1e7f)

Conflicts:

	libavcodec/ffv1enc.c
(cherry picked from commit c900c6e5c26cd86cf34f9c8d4347cedbd01f3935)
---
 libavcodec/ffv1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index ccfcb62dc4..db68a18ab4 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -360,7 +360,7 @@ static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], in
     int run_mode=0;
 
     if(s->ac){
-        if(c->bytestream_end - c->bytestream < w*20){
+        if(c->bytestream_end - c->bytestream < w*35){
             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
             return -1;
         }

From b012da4019e3991a9b274e99eebf7e7de41b035e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 Sep 2013 02:51:05 +0200
Subject: [PATCH 296/315] update for 0.5.13

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 9d6c1754eb..964783a811 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.12
+0.5.13

From 04fb6bb9155a5b1857027b78728badec72734c2e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 26 Sep 2013 21:03:48 +0200
Subject: [PATCH 297/315] avcodec/parser: reset indexes on realloc failure

Fixes Ticket2982

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f31011e9abfb2ae75bb32bc44e2c34194c8dc40a)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/parser.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libavcodec/parser.c b/libavcodec/parser.c
index d738a62b83..1a8f4cff06 100644
--- a/libavcodec/parser.c
+++ b/libavcodec/parser.c
@@ -253,8 +253,10 @@ int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_s
     if(next == END_NOT_FOUND){
         void* new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
 
-        if(!new_buffer)
+        if(!new_buffer) {
+            pc->index = 0;
             return AVERROR(ENOMEM);
+        }
         pc->buffer = new_buffer;
         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
         pc->index += *buf_size;
@@ -267,9 +269,11 @@ int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_s
     /* append to buffer */
     if(pc->index){
         void* new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
-
-        if(!new_buffer)
+        if(!new_buffer) {
+            pc->overread_index =
+            pc->index = 0;
             return AVERROR(ENOMEM);
+        }
         pc->buffer = new_buffer;
         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
         pc->index = 0;

From 02ac859dfe1febbe467a84b1edef6bf8c33b9ffb Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 30 Oct 2013 23:27:28 +0100
Subject: [PATCH 298/315] avcodec/jpeglsdec: check err value for
 ls_get_code_runterm()

Fixes infinite loop
Fixes Ticket3086

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit cc0e47b55096361723b364afa43b79a3f5619cdc)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/jpeglsdec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index fdf4c4c424..1eb8c05f51 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -143,6 +143,8 @@ static inline int ls_get_code_runterm(GetBitContext *gb, JLSState *state, int RI
         ret = ret >> 1;
     }
 
+    if(FFABS(ret) > 0xFFFF)
+        return -0x10000;
     /* update state */
     state->A[Q] += FFABS(ret) - RItype;
     ret *= state->twonear;

From 43aae004557294ffe3487bb4b194c3eb72beb27b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 20 Jan 2014 18:08:18 +0100
Subject: [PATCH 299/315] avcodec/vmnc: Check  that rectangles are within the
 picture

Prevents out of array accesses with CODEC_FLAG_EMU_EDGE

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 6ba02602aa7fc7d38db582e75b8b093fb3c1608d)

Conflicts:

	libavcodec/vmnc.c

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 7c17207ab9acfaa934e8feb8fba90765c9d0b989)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/vmnc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/vmnc.c b/libavcodec/vmnc.c
index c7efaf8a2f..aafd89c30b 100644
--- a/libavcodec/vmnc.c
+++ b/libavcodec/vmnc.c
@@ -275,6 +275,11 @@ static int decode_hextile(VmncContext *c, uint8_t* dst, const uint8_t* src, int
                     }
                     xy = *src++;
                     wh = *src++;
+                    if (   (xy >> 4) + (wh >> 4) + 1 > w - i
+                        || (xy & 0xF) + (wh & 0xF)+1 > h - j) {
+                        av_log(c->avctx, AV_LOG_ERROR, "Rectangle outside picture\n");
+                        return AVERROR_INVALIDDATA;
+                    }
                     paint_rect(dst2, xy >> 4, xy & 0xF, (wh>>4)+1, (wh & 0xF)+1, fg, bpp, stride);
                 }
             }

From 90c8fa52216b7a9fc83167f791dd7bb1d01bbaf2 Mon Sep 17 00:00:00 2001
From: Dale Curtis <dalecurtis@chromium.org>
Date: Thu, 10 Jan 2013 11:05:29 -0800
Subject: [PATCH 300/315] matroska: Fix use after free

Signed-off-by: Dale Curtis <dalecurtis@chromium.org>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
(cherry picked from commit ae3d41636942cbc0236bad21ad06c65f4eb0f096)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/matroskadec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 21f1b5d3e0..6cf9028135 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1503,6 +1503,7 @@ static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
  */
 static void matroska_clear_queue(MatroskaDemuxContext *matroska)
 {
+    matroska->prev_pkt = NULL;
     if (matroska->packets) {
         int n;
         for (n = 0; n < matroska->num_packets; n++) {

From 974c2ad87c348a0116a40758ab47ecf2c8d132d2 Mon Sep 17 00:00:00 2001
From: Xi Wang <xi.wang@gmail.com>
Date: Fri, 15 Mar 2013 06:59:22 -0400
Subject: [PATCH 301/315] lzo: fix overflow checking in copy_backptr()

The check `src > dst' in the form `&c->out[-back] > c->out' invokes
pointer overflow, which is undefined behavior in C.

Remove the check.  Also replace `&c->out[-back] < c->out_start' with
a safe form `c->out - c->out_start < back' to avoid overflow.

CC: libav-stable@libav.org

Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
(cherry picked from commit ca6c3f2c53be70aa3c38e8f1292809db89ea1ba6)

Conflicts:

	libavutil/lzo.c
(cherry picked from commit ff712a262d317f5bd6fc9552cd837508e584a565)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/lzo.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavutil/lzo.c b/libavutil/lzo.c
index 83fa9bfbaf..8cb26370ec 100644
--- a/libavutil/lzo.c
+++ b/libavutil/lzo.c
@@ -118,10 +118,10 @@ static inline void memcpy_backptr(uint8_t *dst, int back, int cnt);
  * cnt > back is valid, this will copy the bytes we just copied,
  * thus creating a repeating pattern with a period length of back.
  */
-static inline void copy_backptr(LZOContext *c, int back, int cnt) {
-    register const uint8_t *src = &c->out[-back];
-    register uint8_t *dst = c->out;
-    if (src < c->out_start || src > dst) {
+static inline void copy_backptr(LZOContext *c, int back, int cnt)
+{
+    register uint8_t *dst       = c->out;
+    if (dst - c->out_start < back) {
         c->error |= AV_LZO_INVALID_BACKPTR;
         return;
     }

From 24a0273cb86ec0b8bf17c71e7f426c3aa9e4989f Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 20 Jun 2014 03:15:28 +0200
Subject: [PATCH 302/315] avutil/lzo: Fix integer overflow

Embargoed-till: 2014-06-27 requested by researcher, but embargo broken by libav today (git and mailing list)

Fixes: LMS-2014-06-16-4
Found-by: "Don A. Bailey" <donb@securitymouse.com>
See: ccda51b14c0fcae2fad73a24872dce75a7964996
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit d6af26c55c1ea30f85a7d9edbc373f53be1743ee)

Conflicts:

	libavutil/lzo.c
(cherry picked from commit 7b5c706494a775b2b0d0e0a38448610802eef8f4)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavutil/lzo.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libavutil/lzo.c b/libavutil/lzo.c
index 8cb26370ec..e284fa8f26 100644
--- a/libavutil/lzo.c
+++ b/libavutil/lzo.c
@@ -62,7 +62,13 @@ static inline int get_byte(LZOContext *c) {
 static inline int get_len(LZOContext *c, int x, int mask) {
     int cnt = x & mask;
     if (!cnt) {
-        while (!(x = get_byte(c))) cnt += 255;
+        while (!(x = get_byte(c))) {
+            if (cnt >= INT_MAX - 1000) {
+                c->error |= AV_LZO_ERROR;
+                break;
+            }
+            cnt += 255;
+        }
         cnt += mask + x;
     }
     return cnt;

From a81f72e482312c9f958737f675dc67d00c05b2b1 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 20 Jul 2014 18:34:16 +0200
Subject: [PATCH 303/315] Update for 0.5.14

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 VERSION    | 2 +-
 cmdutils.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/VERSION b/VERSION
index 964783a811..83ac1cc02f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.13
+0.5.14
diff --git a/cmdutils.c b/cmdutils.c
index 7c4c1a0080..8534c60aba 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -50,7 +50,7 @@ AVCodecContext *avctx_opts[CODEC_TYPE_NB];
 AVFormatContext *avformat_opts;
 struct SwsContext *sws_opts;
 
-const int this_year = 2013;
+const int this_year = 2014;
 
 double parse_number_or_die(const char *context, const char *numstr, int type, double min, double max)
 {

From 37140360112135990e21a41ec12c41fdf5987f92 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 2 Oct 2014 23:17:21 +0200
Subject: [PATCH 304/315] avcodec/jpeglsdec: Check run value more completely in
 ls_decode_line()

previously it could have been by 1 too large
Fixes out of array access
Fixes: asan_heap-oob_12240f5_1_asan_heap-oob_12240f5_448_t8c1e3.jls
Fixes: asan_heap-oob_12240f5_1_asan_heap-oob_12240f5_448_t8nde0.jls
Fixes: asan_heap-oob_12240fa_1_asan_heap-oob_12240fa_448_t16e3.jls

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 06e7d58410a17dc72c30ee7f3145fcacc425f4f2)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/jpeglsdec.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index 1eb8c05f51..1079fc811f 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -205,6 +205,11 @@ static inline void ls_decode_line(JLSState *state, MJpegDecodeContext *s, void *
                 x += stride;
             }
 
+            if (x >= w) {
+                av_log(NULL, AV_LOG_ERROR, "run overflow\n");
+                return;
+            }
+
             /* decode run termination value */
             Rb = R(last, x);
             RItype = (FFABS(Ra - Rb) <= state->near) ? 1 : 0;

From f7170c48328d5d1b22a2d5e7e8888fab37da37b5 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Oct 2014 01:50:27 +0200
Subject: [PATCH 305/315] avcodec/mjpegdec: check bits per pixel for changes
 similar to dimensions

Fixes out of array accesses
Fixes: asan_heap-oob_16668e9_2_asan_heap-oob_16668e9_346_miss_congeniality_pegasus_mjpg.avi

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 5c378d6a6df8243f06c87962b873bd563e58cd39)

Conflicts:

	libavcodec/mjpegdec.c
(cherry picked from commit 94371a404c663c3dae3d542fa43951567ab67f82)

Conflicts:

	libavcodec/mjpegdec.c

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mjpegdec.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index e5c9f38b93..cbaebb2f30 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -193,16 +193,16 @@ int ff_mjpeg_decode_dht(MJpegDecodeContext *s)
 
 int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
 {
-    int len, nb_components, i, width, height, pix_fmt_id;
+    int len, nb_components, i, width, height, bits, pix_fmt_id;
 
     /* XXX: verify len field validity */
     len = get_bits(&s->gb, 16);
-    s->bits= get_bits(&s->gb, 8);
+    bits= get_bits(&s->gb, 8);
 
-    if(s->pegasus_rct) s->bits=9;
-    if(s->bits==9 && !s->pegasus_rct) s->rct=1;    //FIXME ugly
+    if(s->pegasus_rct) bits=9;
+    if(bits==9 && !s->pegasus_rct) s->rct=1;    //FIXME ugly
 
-    if (s->bits != 8 && !s->lossless){
+    if (bits != 8 && !s->lossless){
         av_log(s->avctx, AV_LOG_ERROR, "only 8 bits/component accepted\n");
         return -1;
     }
@@ -222,7 +222,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
     if (nb_components <= 0 ||
         nb_components > MAX_COMPONENTS)
         return -1;
-    if (s->ls && !(s->bits <= 8 || nb_components == 1)){
+    if (s->ls && !(bits <= 8 || nb_components == 1)){
         av_log(s->avctx, AV_LOG_ERROR, "only <= 8 bits/component or 16-bit gray accepted for JPEG-LS\n");
         return -1;
     }
@@ -255,11 +255,14 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
 
     /* if different size, realloc/alloc picture */
     /* XXX: also check h_count and v_count */
-    if (width != s->width || height != s->height) {
+    if (   width != s->width || height != s->height
+        || bits != s->bits
+       ) {
         av_freep(&s->qscale_table);
 
         s->width = width;
         s->height = height;
+        s->bits       = bits;
         s->interlaced = 0;
 
         /* test interlaced mode */

From 9b4507e423e53b6fe2b278fdfb62124721c11dfc Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Oct 2014 14:45:04 +0200
Subject: [PATCH 306/315] avcodec/mmvideo: Bounds check 2nd line of HHV Intra
 blocks

Fixes out of array access
Fixes: asan_heap-oob_4da4f3_8_asan_heap-oob_4da4f3_419_scene1a.mm

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 8b0e96e1f21b761ca15dbb470cd619a1ebf86c3e)

Conflicts:

	libavcodec/mmvideo.c

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/mmvideo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/mmvideo.c b/libavcodec/mmvideo.c
index 238b991a8d..d45564c2cf 100644
--- a/libavcodec/mmvideo.c
+++ b/libavcodec/mmvideo.c
@@ -104,7 +104,7 @@ static void mm_decode_intra(MmContext * s, int half_horiz, int half_vert, const
 
         if (color) {
             memset(s->frame.data[0] + y*s->frame.linesize[0] + x, color, run_length);
-            if (half_vert)
+            if (half_vert && y + half_vert < s->avctx->height)
                 memset(s->frame.data[0] + (y+1)*s->frame.linesize[0] + x, color, run_length);
         }
         x+= run_length;

From b2f2cbdb1caf18e61b5b2666ae29afcf310c901e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Oct 2014 20:15:52 +0200
Subject: [PATCH 307/315] avcodec/gifdec: factorize interleave end handling out

also change it to a loop
Fixes out of array access
Fixes: asan_heap-oob_ca5410_8_asan_heap-oob_ca5410_97_ID_LSD_Size_Less_Then_Data_Inter_3.gif

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 8f1457864be8fb9653643519dea1c6492f1dde57)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/gifdec.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/libavcodec/gifdec.c b/libavcodec/gifdec.c
index d24550c846..49ee7e0e8d 100644
--- a/libavcodec/gifdec.c
+++ b/libavcodec/gifdec.c
@@ -125,26 +125,21 @@ static int gif_read_image(GifState *s)
             case 1:
                 y1 += 8;
                 ptr += linesize * 8;
-                if (y1 >= height) {
-                    y1 = pass ? 2 : 4;
-                    ptr = ptr1 + linesize * y1;
-                    pass++;
-                }
                 break;
             case 2:
                 y1 += 4;
                 ptr += linesize * 4;
-                if (y1 >= height) {
-                    y1 = 1;
-                    ptr = ptr1 + linesize;
-                    pass++;
-                }
                 break;
             case 3:
                 y1 += 2;
                 ptr += linesize * 2;
                 break;
             }
+            while (y1 >= height) {
+                y1 = 4 >> pass;
+                ptr = ptr1 + linesize * y1;
+                pass++;
+            }
         } else {
             ptr += linesize;
         }

From eac21ee7ba0f9a29f96a0abd219f00075cbd30d7 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Oct 2014 21:08:52 +0200
Subject: [PATCH 308/315] avcodec/qpeg: fix off by 1 error in MV bounds check

Fixes out of array access
Fixes: asan_heap-oob_153760f_4_asan_heap-oob_1d7a4cf_164_VWbig6.avi

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit dd3bfe3cc1ca26d0fff3a3baf61a40207032143f)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/qpeg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/qpeg.c b/libavcodec/qpeg.c
index aa8f69c0cf..454c371ed5 100644
--- a/libavcodec/qpeg.c
+++ b/libavcodec/qpeg.c
@@ -165,7 +165,7 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size,
 
                     /* check motion vector */
                     if ((me_x + filled < 0) || (me_x + me_w + filled > width) ||
-                       (height - me_y - me_h < 0) || (height - me_y > orig_height) ||
+                       (height - me_y - me_h < 0) || (height - me_y >= orig_height) ||
                        (filled + me_w > width) || (height - me_h < 0))
                         av_log(NULL, AV_LOG_ERROR, "Bogus motion vector (%i,%i), block size %ix%i at %i,%i\n",
                                me_x, me_y, me_w, me_h, filled, height);

From 7128f67c3f2024b2fd6eb6ad80ea212f855a7929 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 3 Oct 2014 22:50:45 +0200
Subject: [PATCH 309/315] avcodec/smc: fix off by 1 error

Fixes out of array access
Fixes: asan_heap-oob_1685bf0_5_asan_heap-oob_1f35116_430_smc.mov

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit c727401aa9d62335e89d118a5b4e202edf39d905)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/smc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/smc.c b/libavcodec/smc.c
index d0102eaa9c..c640fb2195 100644
--- a/libavcodec/smc.c
+++ b/libavcodec/smc.c
@@ -69,7 +69,7 @@ typedef struct SmcContext {
         row_ptr += stride * 4; \
     } \
     total_blocks--; \
-    if (total_blocks < 0) \
+    if (total_blocks < 0 + !!n_blocks) \
     { \
         av_log(s->avctx, AV_LOG_INFO, "warning: block counter just went negative (this should not happen)\n"); \
         return; \

From b8129b1a7ab959cf2822e43377084d884d791058 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 4 Oct 2014 04:29:40 +0200
Subject: [PATCH 310/315] avformat/mpegts: Check desc_len / get8() return code

Fixes out of array read
Fixes: signal_sigsegv_844d59_10_signal_sigsegv_a17bb7_366_mpegts_mpeg2video_mp2_dvbsub_topfield.rec

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit c3d7f00ee3e09801f56f25db8b5961f25e842bd2)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavformat/mpegts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 33e618c058..c3045d9237 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -769,7 +769,7 @@ static void sdt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
                 break;
             desc_len = get8(&p, desc_list_end);
             desc_end = p + desc_len;
-            if (desc_end > desc_list_end)
+            if (desc_len < 0 || desc_end > desc_list_end)
                 break;
 #ifdef DEBUG_SI
             av_log(ts->stream, AV_LOG_DEBUG, "tag: 0x%02x len=%d\n",

From 831416692b81586f6ef170e2fc021672528bd352 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 28 Oct 2014 15:26:42 +0100
Subject: [PATCH 311/315] avcodec/dxa: check dimensions

Fixes out of array access
Fixes: asan_heap-oob_11222fb_21_020.dxa
Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit e70312dfc22c4e54d5716f28f28db8f99c74cc90)

Conflicts:

	libavcodec/dxa.c

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/dxa.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/dxa.c b/libavcodec/dxa.c
index e415da7187..6d754980c7 100644
--- a/libavcodec/dxa.c
+++ b/libavcodec/dxa.c
@@ -290,6 +290,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
 {
     DxaDecContext * const c = avctx->priv_data;
 
+    if (avctx->width%4 || avctx->height%4) {
+        av_log(avctx, AV_LOG_ERROR, "dimensions are not a multiple of 4");
+        return AVERROR_INVALIDDATA;
+    }
+
     c->avctx = avctx;
     avctx->pix_fmt = PIX_FMT_PAL8;
 

From 473b0160962757c2fef9a6fedc3ff0ddeffdeef9 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 26 Nov 2014 15:45:47 +0100
Subject: [PATCH 312/315] avcodec/pngdec: Check IHDR/IDAT order

Fixes out of array access
Fixes: asan_heap-oob_20a6c26_2690_cov_3434532168_mail.png
Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 79ceaf827be0b070675d4cd0a55c3386542defd8)

Conflicts:

	libavcodec/pngdec.c
---
 libavcodec/pngdec.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index d583a523b0..e9d80f89a9 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -426,6 +426,12 @@ static int decode_frame(AVCodecContext *avctx,
         case MKTAG('I', 'H', 'D', 'R'):
             if (length != 13)
                 goto fail;
+
+            if (s->state & PNG_IDAT) {
+                av_log(avctx, AV_LOG_ERROR, "IHDR after IDAT\n");
+                goto fail;
+            }
+
             s->width = bytestream_get_be32(&s->bytestream);
             s->height = bytestream_get_be32(&s->bytestream);
             if(avcodec_check_dimensions(avctx, s->width, s->height)){

From e74795e54108e6540adf180a5bd4f59023cd16a4 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 29 Nov 2012 15:56:05 +0100
Subject: [PATCH 313/315] huffyuvdec: check width more completely, avoid out of
 array accesses

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit 6abb9a901fca27da14d4fffbb01948288b5da3ba)

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libavcodec/huffyuv.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index aad9168912..72dd52ee06 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -555,7 +555,10 @@ s->bgr32=1;
     default:
         assert(0);
     }
-
+    if (s->predictor == MEDIAN && avctx->pix_fmt == PIX_FMT_YUV422P && avctx->width%4) {
+        av_log(avctx, AV_LOG_ERROR, "width must be a multiple of 4 this colorspace and predictor\n");
+        return AVERROR_INVALIDDATA;
+    }
     alloc_temp(s);
 
 //    av_log(NULL, AV_LOG_DEBUG, "pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_coded_sample, s->interlaced);

From 15d6b44ddcce43c54284c8a73b8897b33d2d6f71 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 28 Nov 2014 20:27:27 +0100
Subject: [PATCH 314/315] update for 0.5.15

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 83ac1cc02f..c5f3c9c45e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.14
+0.5.15

From b256bd3be348fd37108583fdac7db6337994c26d Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michael@niedermayer.cc>
Date: Fri, 31 Jul 2015 15:54:38 +0200
Subject: [PATCH 315/315] MAINTAINERS: Remove myself as leader

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
(cherry picked from commit f2c58931e629343f7d68258cc2b2d62c5f501ba5)

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6b9baa2f8d..8cb9d8908e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8,7 +8,6 @@ FFmpeg code.
 Project Leader
 ==============
 
-Michael Niedermayer
   final design decisions